diff --git a/ai/ai_config.json b/ai/ai_config.json index 2c6a005..c6d97aa 100644 --- a/ai/ai_config.json +++ b/ai/ai_config.json @@ -3,8 +3,8 @@ "max_tokens": 8192, "prompts": { "command_parser": { - "system": "You are a helpful AI assistant that parses user commands into structured JSON. Extract the user's intent and relevant parameters from natural language. Return ONLY valid JSON, no explanations.\n\nBe flexible with language - handle typos, slang, and casual phrasing. Consider conversation context when available.\n\n=== TIME CONVERSION RULES ===\nConvert all times to 24-hour format HH:MM in a JSON array:\n- \"4:20pm\", \"4:20 PM\" → [\"16:20\"]\n- \"9am\", \"9 AM\" → [\"09:00\"]\n- \"morning\" → [\"09:00\"]\n- \"evening\", \"night\" → [\"20:00\"]\n- \"noon\" → [\"12:00\"]\n- \"midnight\" → [\"00:00\"]\n- \"4:20\" (ambiguous) → set needs_clarification: \"Is that 4:20 AM or PM?\"\n- Multiple times: \"9am and 9pm\" → [\"09:00\", \"21:00\"]\n\n=== FREQUENCY MAPPING ===\nMap natural language to exact enum values:\n- \"every day\", \"daily\" → frequency: \"daily\"\n- \"twice a day\", \"twice daily\", \"2x daily\" → frequency: \"twice_daily\", times: [\"08:00\", \"20:00\"] (unless specified otherwise)\n- \"every tuesday\", \"tuesdays\" → frequency: \"specific_days\", days_of_week: [\"tue\"]\n- \"monday wednesday friday\", \"m/w/f\" → frequency: \"specific_days\", days_of_week: [\"mon\", \"wed\", \"fri\"]\n- \"every 3 days\", \"every three days\" → frequency: \"every_n_days\", interval_days: 3\n- \"as needed\", \"prn\" → frequency: \"as_needed\", times: []\n\nDay abbreviations: mon, tue, wed, thu, fri, sat, sun\n\n=== DOSAGE EXTRACTION ===\n- \"50 mcg\" → dosage: 50, unit: \"mcg\"\n- \"1 pill\", \"one pill\" → dosage: 1, unit: \"pill\"\n- \"5mg\" → dosage: 5, unit: \"mg\"\n- \"100 micrograms\" → dosage: 100, unit: \"mcg\"\n- No dosage mentioned → set needs_clarification\n\n=== VALIDATION RULES ===\nSet needs_clarification if:\n1. Dosage is missing for 'add' action\n2. Time is ambiguous (e.g., just \"4:20\" without AM/PM)\n3. Frequency is unclear (e.g., \"sometimes\", \"often\")\n4. Name cannot be determined\n\n=== INTERACTION TYPES ===\n- \"routine\": habits, routines, activities with steps\n- \"medication\": medications, drugs, supplements, vitamins\n\nAvailable actions:\n- routine: create, create_with_steps, list, start, complete, skip, cancel, pause, resume, steps, schedule, stats, history, delete\n- medication: add, delete, list, take, skip, today, refills, snooze, adherence\n\n=== STEP EXTRACTION FOR ROUTINES ===\nWhen user mentions creating a routine WITH steps:\n- \"create morning routine with brush teeth, shower, eat\"\n → action: \"create_with_steps\", name: \"morning routine\", steps: [\"brush teeth\", \"shower\", \"eat\"], needs_confirmation: true\n- \"add steps to X: A, B, C\" → action: \"add_steps\", routine_name: \"X\", steps: [\"A\", \"B\", \"C\"]\n\n=== DELETE ACTIONS ===\nDelete actions are DESTRUCTIVE and should always require confirmation:\n- \"delete my lsd medication\" → action: \"delete\", name: \"lsd\", needs_confirmation: true\n- \"remove wellbutrin\" → action: \"delete\", name: \"wellbutrin\", needs_confirmation: true\n- \"get rid of vitamin d\" → action: \"delete\", name: \"vitamin d\", needs_confirmation: true", - "user_template": "Parse this command into structured JSON.\n\nCurrent conversation context:\n{history_context}\n\nUser message: \"{user_input}\"\n\nReturn JSON with these exact fields:\n{{\n \"interaction_type\": \"routine\" | \"medication\",\n \"action\": \"string\",\n \"name\": \"string\" (med/routine name),\n \"routine_name\": \"string\" (for step-related actions),\n \"description\": \"string\" (optional),\n \"steps\": [\"step1\", \"step2\"] (for routine creation),\n \"dosage\": number (for meds),\n \"unit\": \"string\" (mg, mcg, pill, etc),\n \"frequency\": \"daily\" | \"twice_daily\" | \"specific_days\" | \"every_n_days\" | \"as_needed\",\n \"times\": [\"HH:MM\"],\n \"days_of_week\": [\"mon\", \"tue\", ...],\n \"interval_days\": number (for every_n_days),\n \"needs_confirmation\": boolean (true for destructive/create actions),\n \"confirmation_prompt\": \"string\" (what to ask user),\n \"confidence\": number (0-1),\n \"needs_clarification\": \"string\" (if confidence < 0.8 or missing required fields)\n}}\n\n=== EXAMPLES ===\n\nMedication examples:\n1. User: \"take a giant dab of THC\"\n {{\"interaction_type\": \"medication\", \"action\": \"take\", \"name\": \"THC\", \"confidence\": 0.9}}\n\n2. User: \"add lsd 50 mcg daily at 9am\"\n {{\"interaction_type\": \"medication\", \"action\": \"add\", \"name\": \"lsd\", \"dosage\": 50, \"unit\": \"mcg\", \"frequency\": \"daily\", \"times\": [\"09:00\"], \"confidence\": 0.95}}\n\n3. User: \"add wellbutrin 150 mg twice daily\"\n {{\"interaction_type\": \"medication\", \"action\": \"add\", \"name\": \"wellbutrin\", \"dosage\": 150, \"unit\": \"mg\", \"frequency\": \"twice_daily\", \"times\": [\"08:00\", \"20:00\"], \"confidence\": 0.95}}\n\n4. User: \"add vitamin d on tuesday and saturday at 8am\"\n {{\"interaction_type\": \"medication\", \"action\": \"add\", \"name\": \"vitamin d\", \"dosage\": 1, \"unit\": \"pill\", \"frequency\": \"specific_days\", \"times\": [\"08:00\"], \"days_of_week\": [\"tue\", \"sat\"], \"confidence\": 0.95}}\n\n5. User: \"add lsd every tuesday at 4:20\"\n {{\"interaction_type\": \"medication\", \"action\": \"add\", \"name\": \"lsd\", \"needs_clarification\": \"Is that 4:20 AM or PM?\", \"confidence\": 0.7}}\n\n6. User: \"delete my lsd medication\"\n {{\"interaction_type\": \"medication\", \"action\": \"delete\", \"name\": \"lsd\", \"needs_confirmation\": true, \"confirmation_prompt\": \"Are you sure you want to delete lsd?\", \"confidence\": 0.95}}\n\n7. User: \"remove wellbutrin\"\n {{\"interaction_type\": \"medication\", \"action\": \"delete\", \"name\": \"wellbutrin\", \"needs_confirmation\": true, \"confirmation_prompt\": \"Are you sure you want to delete wellbutrin?\", \"confidence\": 0.95}}\n\n8. User: \"which meds do I have?\"\n {{\"interaction_type\": \"medication\", \"action\": \"list\", \"confidence\": 0.95}}\n\n9. User: \"what's my schedule today?\"\n {{\"interaction_type\": \"medication\", \"action\": \"today\", \"confidence\": 0.9}}\n\n10. User: \"any refills due?\"\n {{\"interaction_type\": \"medication\", \"action\": \"refills\", \"confidence\": 0.9}}\n\n11. User: \"snooze my reminder for 30 minutes\"\n {{\"interaction_type\": \"medication\", \"action\": \"snooze\", \"minutes\": 30, \"confidence\": 0.9}}\n\nRoutine examples:\n12. User: \"create a morning routine\"\n {{\"interaction_type\": \"routine\", \"action\": \"create\", \"name\": \"morning routine\", \"needs_confirmation\": true, \"confirmation_prompt\": \"Create routine 'morning routine'?\", \"confidence\": 0.9}}\n\n13. User: \"create morning routine with brush teeth, shower, eat breakfast\"\n {{\"interaction_type\": \"routine\", \"action\": \"create_with_steps\", \"name\": \"morning routine\", \"steps\": [\"brush teeth\", \"shower\", \"eat breakfast\"], \"needs_confirmation\": true, \"confirmation_prompt\": \"Create 'morning routine' with 3 steps?\", \"confidence\": 0.95}}\n\n14. User: \"add steps to morning routine: meditate, journal\"\n {{\"interaction_type\": \"routine\", \"action\": \"add_steps\", \"routine_name\": \"morning routine\", \"steps\": [\"meditate\", \"journal\"], \"confidence\": 0.9}}\n\n15. User: \"what steps are in my morning routine?\"\n {{\"interaction_type\": \"routine\", \"action\": \"steps\", \"name\": \"morning routine\", \"confidence\": 0.9}}\n\n16. User: \"start morning routine\"\n {{\"interaction_type\": \"routine\", \"action\": \"start\", \"name\": \"morning routine\", \"confidence\": 0.9}}\n\n17. User: \"done\" (when in active session)\n {{\"interaction_type\": \"routine\", \"action\": \"complete\", \"confidence\": 0.9}}\n\n18. User: \"skip this step\"\n {{\"interaction_type\": \"routine\", \"action\": \"skip\", \"confidence\": 0.9}}\n\n19. User: \"schedule morning routine for monday wednesday friday at 7am\"\n {{\"interaction_type\": \"routine\", \"action\": \"schedule\", \"name\": \"morning routine\", \"days_of_week\": [\"mon\", \"wed\", \"fri\"], \"times\": [\"07:00\"], \"confidence\": 0.9}}\n\n20. User: \"show my routine stats\"\n {{\"interaction_type\": \"routine\", \"action\": \"stats\", \"confidence\": 0.9}}\n\n21. User: \"I want to create a habit called smoke dope\"\n {{\"interaction_type\": \"routine\", \"action\": \"create\", \"name\": \"smoke dope\", \"needs_confirmation\": true, \"confirmation_prompt\": \"Create routine 'smoke dope'?\", \"confidence\": 0.9}}\n\n22. User: \"delete morning routine\"\n {{\"interaction_type\": \"routine\", \"action\": \"delete\", \"name\": \"morning routine\", \"needs_confirmation\": true, \"confirmation_prompt\": \"Are you sure you want to delete morning routine?\", \"confidence\": 0.95}}\n\n23. User: \"add lsd\" (missing dosage)\n {{\"interaction_type\": \"medication\", \"action\": \"add\", \"name\": \"lsd\", \"needs_clarification\": \"What's the dosage for lsd?\", \"confidence\": 0.6}}\n\nIf unclear, set needs_clarification explaining what's needed." + "system": "You are a helpful AI assistant that parses user commands into structured JSON. Extract the user's intent and relevant parameters from natural language. Return ONLY valid JSON, no explanations.\n\nBe flexible with language - handle typos, slang, and casual phrasing. Consider conversation context when available.\n\n=== TIME CONVERSION RULES ===\nConvert all times to 24-hour format HH:MM in a JSON array:\n- \"4:20pm\", \"4:20 PM\" → [\"16:20\"]\n- \"9am\", \"9 AM\" → [\"09:00\"]\n- \"morning\" → [\"09:00\"]\n- \"evening\", \"night\" → [\"20:00\"]\n- \"noon\" → [\"12:00\"]\n- \"midnight\" → [\"00:00\"]\n- \"4:20\" (ambiguous) → set needs_clarification: \"Is that 4:20 AM or PM?\"\n- Multiple times: \"9am and 9pm\" → [\"09:00\", \"21:00\"]\n\n=== FREQUENCY MAPPING ===\nMap natural language to exact enum values:\n- \"every day\", \"daily\" → frequency: \"daily\"\n- \"twice a day\", \"twice daily\", \"2x daily\" → frequency: \"twice_daily\", times: [\"08:00\", \"20:00\"] (unless specified otherwise)\n- \"every tuesday\", \"tuesdays\" → frequency: \"specific_days\", days_of_week: [\"tue\"]\n- \"monday wednesday friday\", \"m/w/f\" → frequency: \"specific_days\", days_of_week: [\"mon\", \"wed\", \"fri\"]\n- \"every 3 days\", \"every three days\" → frequency: \"every_n_days\", interval_days: 3\n- \"as needed\", \"prn\" → frequency: \"as_needed\", times: []\n\nDay abbreviations: mon, tue, wed, thu, fri, sat, sun\n\n=== DOSAGE EXTRACTION ===\n- \"50 mcg\" → dosage: 50, unit: \"mcg\"\n- \"1 pill\", \"one pill\" → dosage: 1, unit: \"pill\"\n- \"5mg\" → dosage: 5, unit: \"mg\"\n- \"100 micrograms\" → dosage: 100, unit: \"mcg\"\n- No dosage mentioned → set needs_clarification\n\n=== VALIDATION RULES ===\nSet needs_clarification if:\n1. Dosage is missing for 'add' action\n2. Time is ambiguous (e.g., just \"4:20\" without AM/PM)\n3. Frequency is unclear (e.g., \"sometimes\", \"often\")\n4. Name cannot be determined\n\n=== INTERACTION TYPES ===\n- \"routine\": habits, morning routines, task sequences\n- \"medication\": pills, prescriptions, supplements, dosages\n- \"knowledge\": questions about books, asking what a book says, referencing book content", + "user_template": "Parse this command into structured JSON.\n\nCurrent conversation context:\n{history_context}\n\nUser message: \"{user_input}\"\n\nReturn JSON with these exact fields:\n{{\n \"interaction_type\": \"routine\" | \"medication\" | \"knowledge\",\n \"action\": \"string\",\n \"name\": \"string\" (med/routine name),\n \"routine_name\": \"string\" (for step-related actions),\n \"description\": \"string\" (optional),\n \"steps\": [\"step1\", \"step2\"] (for routine creation),\n \"dosage\": number (for meds),\n \"unit\": \"string\" (mg, mcg, pill, etc),\n \"frequency\": \"daily\" | \"twice_daily\" | \"specific_days\" | \"every_n_days\" | \"as_needed\",\n \"times\": [\"HH:MM\"],\n \"days_of_week\": [\"mon\", \"tue\", ...],\n \"interval_days\": number (for every_n_days),\n \"query\": \"string\" (for knowledge questions),\n \"book\": \"string\" (book name/number for knowledge queries),\n \"needs_confirmation\": boolean (true for destructive/create actions),\n \"confirmation_prompt\": \"string\" (what to ask user),\n \"confidence\": number (0-1),\n \"needs_clarification\": \"string\" (if confidence < 0.8 or missing required fields)\n}}\n\n=== EXAMPLES ===\n\nMedication examples:\n1. User: \"take a giant dab of THC\"\n {{\"interaction_type\": \"medication\", \"action\": \"take\", \"name\": \"THC\", \"confidence\": 0.9}}\n\n2. User: \"add lsd 50 mcg daily at 9am\"\n {{\"interaction_type\": \"medication\", \"action\": \"add\", \"name\": \"lsd\", \"dosage\": 50, \"unit\": \"mcg\", \"frequency\": \"daily\", \"times\": [\"09:00\"], \"confidence\": 0.95}}\n\n3. User: \"add wellbutrin 150 mg twice daily\"\n {{\"interaction_type\": \"medication\", \"action\": \"add\", \"name\": \"wellbutrin\", \"dosage\": 150, \"unit\": \"mg\", \"frequency\": \"twice_daily\", \"times\": [\"08:00\", \"20:00\"], \"confidence\": 0.95}}\n\nRoutine examples:\n1. User: \"create morning routine with brush teeth, shower, eat\"\n {{\"interaction_type\": \"routine\", \"action\": \"create_with_steps\", \"name\": \"morning\", \"steps\": [\"brush teeth\", \"shower\", \"eat\"], \"confidence\": 0.95}}\n\n2. User: \"start my morning routine\"\n {{\"interaction_type\": \"routine\", \"action\": \"start\", \"name\": \"morning\", \"confidence\": 0.9}}\n\nKnowledge examples:\n1. User: \"what does the book say about time management?\"\n {{\"interaction_type\": \"knowledge\", \"action\": \"query\", \"query\": \"time management\", \"confidence\": 0.9}}\n\n2. User: \"ask atomic habits about habit formation\"\n {{\"interaction_type\": \"knowledge\", \"action\": \"query\", \"book\": \"atomic habits\", \"query\": \"habit formation\", \"confidence\": 0.95}}\n\n3. User: \"list available books\"\n {{\"interaction_type\": \"knowledge\", \"action\": \"list\", \"confidence\": 0.95}}\n\n4. User: \"select book 2\"\n {{\"interaction_type\": \"knowledge\", \"action\": \"select\", \"book\": \"2\", \"confidence\": 0.95}}\n\n5. User: \"what does taking charge of adult adhd say about sleep?\"\n {{\"interaction_type\": \"knowledge\", \"action\": \"query\", \"book\": \"taking charge of adult adhd\", \"query\": \"sleep\", \"confidence\": 0.95}}\n\n6. User: \"how do I handle ADHD at work according to the book?\"\n {{\"interaction_type\": \"knowledge\", \"action\": \"query\", \"query\": \"handling ADHD at work\", \"confidence\": 0.9}}" } }, "validation": { diff --git a/bot/bot.py b/bot/bot.py index de5aeb1..0c2f730 100644 --- a/bot/bot.py +++ b/bot/bot.py @@ -20,8 +20,9 @@ import pickle from bot.command_registry import get_handler, list_registered import ai.parser as ai_parser -import bot.commands.routines # noqa: F401 - registers handler +import bot.commands.routines # noqa: F401 - registers handler import bot.commands.medications # noqa: F401 - registers handler +import bot.commands.knowledge # noqa: F401 - registers handler DISCORD_BOT_TOKEN = os.getenv("DISCORD_BOT_TOKEN") API_URL = os.getenv("API_URL", "http://app:5000") @@ -217,7 +218,7 @@ async def checkActiveSession(session): token = session.get("token") if not token: return None - + resp, status = apiRequest("get", "/api/sessions/active", token) if status == 200 and "session" in resp: return resp @@ -228,45 +229,45 @@ async def handleConfirmation(message, session): """Handle yes/no confirmation responses. Returns True if handled.""" discord_id = message.author.id user_input = message.content.lower().strip() - + if "pending_confirmations" not in session: return False - + # Check for any pending confirmations pending = session["pending_confirmations"] if not pending: return False - + # Get the most recent pending confirmation confirmation_id = list(pending.keys())[-1] confirmation_data = pending[confirmation_id] - + if user_input in ("yes", "y", "yeah", "sure", "ok", "confirm"): # Execute the confirmed action del pending[confirmation_id] - + interaction_type = confirmation_data.get("interaction_type") handler = get_handler(interaction_type) - + if handler: # Create a fake parsed object for the handler fake_parsed = confirmation_data.copy() fake_parsed["needs_confirmation"] = False await handler(message, session, fake_parsed) return True - + elif user_input in ("no", "n", "nah", "cancel", "abort"): del pending[confirmation_id] await message.channel.send("❌ Cancelled.") return True - + return False async def handleActiveSessionShortcuts(message, session, active_session): """Handle shortcuts like 'done', 'skip', 'next' when in active session.""" user_input = message.content.lower().strip() - + # Map common shortcuts to actions shortcuts = { "done": ("routine", "complete"), @@ -283,7 +284,7 @@ async def handleActiveSessionShortcuts(message, session, active_session): "quit": ("routine", "cancel"), "abort": ("routine", "abort"), } - + if user_input in shortcuts: interaction_type, action = shortcuts[user_input] handler = get_handler(interaction_type) @@ -291,7 +292,7 @@ async def handleActiveSessionShortcuts(message, session, active_session): fake_parsed = {"action": action} await handler(message, session, fake_parsed) return True - + return False @@ -306,21 +307,23 @@ async def routeCommand(message): # Check for active session first active_session = await checkActiveSession(session) - + # Handle confirmation responses confirmation_handled = await handleConfirmation(message, session) if confirmation_handled: return - + # Handle shortcuts when in active session if active_session: - shortcut_handled = await handleActiveSessionShortcuts(message, session, active_session) + shortcut_handled = await handleActiveSessionShortcuts( + message, session, active_session + ) if shortcut_handled: return async with message.channel.typing(): history = message_history.get(discord_id, []) - + # Add context about active session to help AI understand context = "" if active_session: @@ -329,8 +332,10 @@ async def routeCommand(message): current_step = session_data.get("current_step_index", 0) + 1 total_steps = active_session.get("total_steps", 0) context = f"\n[Context: User is currently in active session for '{routine_name}', on step {current_step} of {total_steps}. They can say 'done', 'skip', 'pause', 'resume', or 'stop'.]" - - parsed = ai_parser.parse(message.content + context, "command_parser", history=history) + + parsed = ai_parser.parse( + message.content + context, "command_parser", history=history + ) if discord_id not in message_history: message_history[discord_id] = [] diff --git a/bot/commands/knowledge.py b/bot/commands/knowledge.py new file mode 100644 index 0000000..8dfa1e0 --- /dev/null +++ b/bot/commands/knowledge.py @@ -0,0 +1,300 @@ +""" +Knowledge base command handler - RAG-powered Q&A from book embeddings +Supports multiple books with user selection +""" + +import os +import json +import glob +import numpy as np +from typing import List, Tuple, Optional, Dict +from pathlib import Path + +from bot.command_registry import register_module +import ai.parser as ai_parser +from ai.parser import client + +# Configuration +EPUBS_DIRECTORY = os.getenv("KNOWLEDGE_EMBEDDINGS_DIR", "../embedding-generator/epubs") +TOP_K_CHUNKS = 5 +EMBEDDING_MODEL = "sentence-transformers/all-minilm-l12-l2" +CHAT_MODEL = "deepseek/deepseek-v3.2" +EMBEDDING_EXTENSION = ".embeddings.json" + +# Cache for loaded embeddings: {file_path: (chunks, embeddings, metadata)} +_knowledge_cache: Dict[str, Tuple[List[str], List[List[float]], dict]] = {} + + +def find_embedding_files() -> List[str]: + """Find all embedding files in the directory.""" + os.makedirs(EPUBS_DIRECTORY, exist_ok=True) + pattern = os.path.join(EPUBS_DIRECTORY, f"*{EMBEDDING_EXTENSION}") + files = glob.glob(pattern) + return sorted(files) + + +def get_book_name(file_path: str) -> str: + """Extract readable book name from file path.""" + return ( + Path(file_path).stem.replace(EMBEDDING_EXTENSION, "").replace(".", " ").title() + ) + + +def load_knowledge_base( + file_path: str, +) -> Optional[Tuple[List[str], List[List[float]], dict]]: + """Load and cache a specific embeddings file.""" + if file_path in _knowledge_cache: + return _knowledge_cache[file_path] + + if not os.path.exists(file_path): + return None + + with open(file_path, "r") as f: + data = json.load(f) + + chunks = data.get("chunks", []) + embeddings = data.get("embeddings", []) + metadata = data.get("metadata", {}) + + # Add file_path to metadata for reference + metadata["_file_path"] = file_path + + _knowledge_cache[file_path] = (chunks, embeddings, metadata) + return _knowledge_cache[file_path] + + +def get_query_embedding(query: str) -> List[float]: + """Embed the user's question via OpenRouter.""" + response = client.embeddings.create(model=EMBEDDING_MODEL, input=query) + return response.data[0].embedding + + +def cosine_similarity(vec1: List[float], vec2: List[float]) -> float: + """Calculate similarity between two vectors.""" + vec1 = np.array(vec1) + vec2 = np.array(vec2) + return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2)) + + +def search_context( + query_embedding: List[float], + chunks: List[str], + embeddings: List[List[float]], + top_k: int = 5, +) -> Tuple[List[str], List[float]]: + """Find the most relevant chunks and return them with scores.""" + scores = [] + for i, emb in enumerate(embeddings): + score = cosine_similarity(query_embedding, emb) + scores.append((score, i)) + + scores.sort(key=lambda x: x[0], reverse=True) + top_chunks = [chunks[i] for score, i in scores[:top_k]] + top_scores = [score for score, i in scores[:top_k]] + + return top_chunks, top_scores + + +def generate_answer(query: str, context_chunks: List[str], book_title: str) -> str: + """Generate answer using DeepSeek via OpenRouter.""" + + context_text = "\n\n---\n\n".join(context_chunks) + + system_prompt = f"""You are an expert assistant answering questions about "{book_title}". +Answer based strictly on the provided context. If the answer isn't in the context, say you don't know. +Do not make up information. Provide clear, helpful answers based on the book's content. + +Context from {book_title}: +{context_text}""" + + try: + response = client.chat.completions.create( + model=CHAT_MODEL, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": query}, + ], + temperature=0.1, + ) + return response.choices[0].message.content + except Exception as e: + return f"❌ Error generating answer: {e}" + + +def get_user_selected_book(session) -> Optional[str]: + """Get the currently selected book for a user.""" + return session.get("knowledge_base", {}).get("selected_book") + + +def set_user_selected_book(session, file_path: str): + """Set the selected book for a user.""" + if "knowledge_base" not in session: + session["knowledge_base"] = {} + session["knowledge_base"]["selected_book"] = file_path + + +async def handle_knowledge(message, session, parsed): + """Handle knowledge base queries with dynamic book selection.""" + action = parsed.get("action", "query") + + if action == "list": + embedding_files = find_embedding_files() + + if not embedding_files: + await message.channel.send( + f"❌ No knowledge bases found in `{EPUBS_DIRECTORY}`" + ) + return + + lines = [f"{i + 1}. {get_book_name(f)}" for i, f in enumerate(embedding_files)] + current = get_user_selected_book(session) + current_text = ( + f"\n\n📖 Currently selected: **{get_book_name(current)}**" + if current + else "" + ) + + await message.channel.send( + f"📚 **Available Knowledge Bases:**\n" + + "\n".join(lines) + + current_text + + "\n\nUse `ask ` or `select book `" + ) + + elif action == "select": + book_identifier = parsed.get("book", "") + embedding_files = find_embedding_files() + + if not embedding_files: + await message.channel.send("❌ No knowledge bases available.") + return + + selected_file = None + + # Try to parse as number + try: + book_num = int(book_identifier) - 1 + if 0 <= book_num < len(embedding_files): + selected_file = embedding_files[book_num] + except (ValueError, TypeError): + # Try to match by name + book_lower = book_identifier.lower() + for f in embedding_files: + if book_lower in get_book_name(f).lower() or book_lower in f.lower(): + selected_file = f + break + + if not selected_file: + await message.channel.send( + f"❌ Could not find book '{book_identifier}'. Use `list books` to see available options." + ) + return + + set_user_selected_book(session, selected_file) + book_name = get_book_name(selected_file) + await message.channel.send(f"✅ Selected knowledge base: **{book_name}**") + + elif action == "query": + query = parsed.get("query", "") + book_override = parsed.get("book", "") + + if not query: + await message.channel.send( + "What would you like to know? (e.g., 'what does the book say about time management?')" + ) + return + + # Determine which book to use + selected_file = None + + if book_override: + # User specified a book in the query + embedding_files = find_embedding_files() + book_lower = book_override.lower() + + # Try number first + try: + book_num = int(book_override) - 1 + if 0 <= book_num < len(embedding_files): + selected_file = embedding_files[book_num] + except (ValueError, TypeError): + # Try name match + for f in embedding_files: + if ( + book_lower in get_book_name(f).lower() + or book_lower in f.lower() + ): + selected_file = f + break + else: + # Use user's selected book or default to first available + selected_file = get_user_selected_book(session) + if not selected_file: + embedding_files = find_embedding_files() + if embedding_files: + selected_file = embedding_files[0] + set_user_selected_book(session, selected_file) + + if not selected_file: + await message.channel.send( + "❌ No knowledge base available. Please check the embeddings directory." + ) + return + + # Load knowledge base + kb_data = load_knowledge_base(selected_file) + if kb_data is None: + await message.channel.send( + "❌ Error loading knowledge base. Please check the file path." + ) + return + + chunks, embeddings, metadata = kb_data + book_title = metadata.get("title", get_book_name(selected_file)) + + await message.channel.send(f"🔍 Searching **{book_title}**...") + + try: + # Get query embedding and search + query_emb = get_query_embedding(query) + relevant_chunks, scores = search_context( + query_emb, chunks, embeddings, TOP_K_CHUNKS + ) + + # Generate answer + answer = generate_answer(query, relevant_chunks, book_title) + + # Send response + await message.channel.send(f"📚 **Answer:**\n{answer}") + + except Exception as e: + await message.channel.send(f"❌ Error processing query: {e}") + + else: + await message.channel.send( + f"Unknown knowledge action: {action}. Try: list, select, or ask a question." + ) + + +def validate_knowledge_json(data): + """Validate parsed JSON for knowledge queries.""" + errors = [] + + if not isinstance(data, dict): + return ["Response must be a JSON object"] + + if "error" in data: + return [] + + if "action" not in data: + errors.append("Missing required field: action") + + return errors + + +# Register the module +register_module("knowledge", handle_knowledge) + +# Register the validator +ai_parser.register_validator("knowledge", validate_knowledge_json)