From 2a05a80cc278dc548c4c258eeaeba35ec3feffaa Mon Sep 17 00:00:00 2001 From: chelsea Date: Tue, 11 Nov 2025 23:21:50 -0600 Subject: [PATCH] prompt: note safety guardrails --- AppConfig.py | 2 +- prompts/tool_instructions.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/AppConfig.py b/AppConfig.py index 730610c..d53a6a0 100644 --- a/AppConfig.py +++ b/AppConfig.py @@ -49,7 +49,7 @@ defaultPromptRecords = [ }, ] -defaultToolInstructions = """Tooling and JSON actions\n\n1. Only emit JSON when the user confirms they want an action performed.\n2. Wrap every payload in a single fenced ```json block.\n3. Supported payloads today: take_note, store_task, schedule_reminder.\n4. Keep conversational text before or after the block short and clear.\n\nWhen logging a note, output exactly:\n```json\n{\n \"action\": \"take_note\",\n \"note\": \"\"\n}\n```\nSwap in the user's wording (including emojis or punctuation) for the placeholder.\n""" +defaultToolInstructions = """Tooling and JSON actions\n\n1. Only emit JSON when the user confirms they want an action performed.\n2. Wrap every payload in a single fenced ```json block.\n3. Supported payloads today: take_note, store_task, schedule_reminder.\n4. Keep conversational text before or after the block short and clear.\n5. If you respond with a safety/guardrail notice (refusal, policy warning, etc.), append a take_note that captures what was blocked and why.\n\nWhen logging a note, output exactly:\n```json\n{\n \"action\": \"take_note\",\n \"note\": \"\"\n}\n```\nSwap in the user's wording (including emojis or punctuation) for the placeholder.\n""" def ensurePromptAssets(): diff --git a/prompts/tool_instructions.md b/prompts/tool_instructions.md index 4224f09..b0b026d 100644 --- a/prompts/tool_instructions.md +++ b/prompts/tool_instructions.md @@ -4,6 +4,7 @@ 2. Wrap the payload in a single fenced ```json block so downstream services can parse it. 3. Supported payloads today: `take_note`, `store_task`, `schedule_reminder`. 4. Keep conversational guidance before/after the block short and clear. +5. If you respond with any safety/guardrail notice (policy refusal, warning, etc.), also emit a `take_note` that states the request you blocked and why so operators can follow up. When logging a note, output exactly: ```json