Merge branch 'main' of https://git.scorpi.us/chelsea/Synculous-2

Fix knowledge base loader to handle list format embeddings
Handle both dict and list formats in load_knowledge_base function to fix AttributeError
2026-02-16 19:08:19 -06:00 · 2026-02-16 19:06:31 -06:00
2 changed files with 191 additions and 390 deletions
--- a/bot/bot.py
+++ b/bot/bot.py
@@ -1,409 +1,195 @@
 """
 bot.py - Discord bot client with session management and command routing
 Features:
 - Login flow with username/password
 - Session management with JWT tokens
 - AI-powered command parsing via registry
 - Background task loop for polling
 """
 import discord
 from discord.ext import tasks
 import os
 import sys
 import json
-import base64
+import time
-import requests
+import numpy as np
-import bcrypt
+from openai import OpenAI
 import pickle
-from bot.command_registry import get_handler, list_registered
+# --- Configuration ---
-import ai.parser as ai_parser
+CONFIG_PATH = 'config.json'
-import bot.commands.routines  # noqa: F401 - registers handler
+KNOWLEDGE_BASE_PATH = 'dbt_knowledge.json'
 import bot.commands.medications  # noqa: F401 - registers handler
 import bot.commands.knowledge  # noqa: F401 - registers handler
-DISCORD_BOT_TOKEN = os.getenv("DISCORD_BOT_TOKEN")
+class SimpleVectorStore:
-API_URL = os.getenv("API_URL", "http://app:5000")
+    """A simple in-memory vector store using NumPy."""
    def __init__(self):
        self.vectors = []
        self.metadata = []
-user_sessions = {}
+    def add(self, vectors, metadatas):
-login_state = {}
+        self.vectors.extend(vectors)
-message_history = {}
+        self.metadata.extend(metadatas)
 user_cache = {}
 CACHE_FILE = "/app/user_cache.pkl"
-intents = discord.Intents.default()
+    def search(self, query_vector, top_k=5):
-intents.message_content = True
+        if not self.vectors:
            return []
-client = discord.Client(intents=intents)
+        # Convert to numpy arrays for efficient math
        query_vec = np.array(query_vector)
        doc_vecs = np.array(self.vectors)
        # Cosine Similarity: (A . B) / (||A|| * ||B||)
        # Note: Both vectors must have the same dimension (e.g., 4096)
        norms = np.linalg.norm(doc_vecs, axis=1)
-def decodeJwtPayload(token):
+        # Avoid division by zero
-    payload = token.split(".")[1]
+        valid_indices = norms > 0
-    payload += "=" * (4 - len(payload) % 4)
+        scores = np.zeros(len(doc_vecs))
    return json.loads(base64.urlsafe_b64decode(payload))
        # Calculate dot product
        dot_products = np.dot(doc_vecs, query_vec)
-def apiRequest(method, endpoint, token=None, data=None):
+        # Calculate cosine similarity only for valid norms
-    url = f"{API_URL}{endpoint}"
+        scores[valid_indices] = dot_products[valid_indices] / (norms[valid_indices] * np.linalg.norm(query_vec))
-    headers = {"Content-Type": "application/json"}
+        
-    if token:
+        # Get top_k indices
-        headers["Authorization"] = f"Bearer {token}"
+        top_indices = np.argsort(scores)[-top_k:][::-1]
-    try:
+        
-        resp = getattr(requests, method)(url, headers=headers, json=data, timeout=10)
+        results = []
        for idx in top_indices:
            results.append({
                "metadata": self.metadata[idx],
                "score": scores[idx]
            })
        return results
 class JurySystem:
    def __init__(self):
        self.config = self.load_config()
        # Initialize OpenRouter Client
        self.client = OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=self.config['openrouter_api_key']
        )
        self.vector_store = SimpleVectorStore()
        self.load_knowledge_base()
    def load_config(self):
        with open(CONFIG_PATH, 'r') as f:
            return json.load(f)
    def load_knowledge_base(self):
        """Loads the pre-computed embeddings from the JSON file."""
        print(f"Loading knowledge base from {KNOWLEDGE_BASE_PATH}...")
        try:
-            return resp.json(), resp.status_code
+            with open(KNOWLEDGE_BASE_PATH, 'r', encoding='utf-8') as f:
-        except ValueError:
+                data = json.load(f)
            return {}, resp.status_code
    except requests.RequestException:
        return {"error": "API unavailable"}, 503
            vectors = []
            metadata = []
-def loadCache():
+            for item in data:
-    try:
+                vectors.append(item['embedding'])
-        if os.path.exists(CACHE_FILE):
+                metadata.append({
-            with open(CACHE_FILE, "rb") as f:
+                    "id": item['id'],
-                global user_cache
+                    "source": item['source'],
-                user_cache = pickle.load(f)
+                    "text": item['text']
-                print(f"Loaded cache for {len(user_cache)} users")
+                })
    except Exception as e:
        print(f"Error loading cache: {e}")
            self.vector_store.add(vectors, metadata)
            print(f"Loaded {len(vectors)} chunks into vector store.")
-def saveCache():
+        except FileNotFoundError:
-    try:
+            print(f"Error: {KNOWLEDGE_BASE_PATH} not found. Did you run the embedder script?")
-        with open(CACHE_FILE, "wb") as f:
+            exit(1)
-            pickle.dump(user_cache, f)
+        except Exception as e:
-    except Exception as e:
+            print(f"Error loading knowledge base: {e}")
-        print(f"Error saving cache: {e}")
+            exit(1)
    def retrieve_context(self, query, top_k=5):
        print("[1. Retrieving Context...]")
-def hashPassword(password):
+        try:
-    return bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8")
+            # --- CRITICAL FIX: Use the EXACT same model as the embedder ---
-
+            # Embedder used: "qwen/qwen3-embedding-8b" -> Dimension 4096
-
+            # We must use the same here to avoid shape mismatch.
-def verifyPassword(password, hashed):
+            response = self.client.embeddings.create(
-    return bcrypt.checkpw(password.encode("utf-8"), hashed.encode("utf-8"))
+                model="qwen/qwen3-embedding-8b", 
-
+                input=query
 def getCachedUser(discord_id):
    return user_cache.get(discord_id)
 def setCachedUser(discord_id, user_data):
    user_cache[discord_id] = user_data
    saveCache()
 def negotiateToken(discord_id, username, password):
    cached = getCachedUser(discord_id)
    if (
        cached
        and cached.get("username") == username
        and verifyPassword(password, cached.get("hashed_password"))
    ):
        result, status = apiRequest(
            "post", "/api/login", data={"username": username, "password": password}
        )
        if status == 200 and "token" in result:
            token = result["token"]
            payload = decodeJwtPayload(token)
            user_uuid = payload["sub"]
            setCachedUser(
                discord_id,
                {
                    "hashed_password": cached["hashed_password"],
                    "user_uuid": user_uuid,
                    "username": username,
                },
            )
            return token, user_uuid
        return None, None
    result, status = apiRequest(
        "post", "/api/login", data={"username": username, "password": password}
    )
    if status == 200 and "token" in result:
        token = result["token"]
        payload = decodeJwtPayload(token)
        user_uuid = payload["sub"]
        setCachedUser(
            discord_id,
            {
                "hashed_password": hashPassword(password),
                "user_uuid": user_uuid,
                "username": username,
            },
        )
        return token, user_uuid
    return None, None
 async def handleAuthFailure(message):
    discord_id = message.author.id
    user_sessions.pop(discord_id, None)
    await message.channel.send(
        "Your session has expired. Send any message to log in again."
    )
 async def handleLoginStep(message):
    discord_id = message.author.id
    state = login_state[discord_id]
    if state["step"] == "username":
        state["username"] = message.content.strip()
        state["step"] = "password"
        await message.channel.send("Password?")
    elif state["step"] == "password":
        username = state["username"]
        password = message.content.strip()
        del login_state[discord_id]
        token, user_uuid = negotiateToken(discord_id, username, password)
        if token and user_uuid:
            user_sessions[discord_id] = {
                "token": token,
                "user_uuid": user_uuid,
                "username": username,
            }
            registered = ", ".join(list_registered()) or "none"
            await message.channel.send(
                f"Welcome back **{username}**!\n\n"
                f"Registered modules: {registered}\n\n"
                f"Send 'help' for available commands."
            )
        else:
            await message.channel.send(
                "Invalid credentials. Send any message to try again."
            )
            query_emb = response.data[0].embedding
-async def sendHelpMessage(message):
+            # Search the vector store
-    help_msg = """**🤖 Synculous Bot - Natural Language Commands**
+            context_chunks = self.vector_store.search(query_emb, top_k=top_k)
-Just talk to me naturally! Here are some examples:
+            return context_chunks
-**💊 Medications:**
+        except Exception as e:
-• "add lsd 50 mcg every tuesday at 4:20pm"
+            print(f"Error retrieving context: {e}")
-• "take my wellbutrin"
+            return []
 • "what meds do i have today?"
 • "show my refills"
 • "snooze my reminder for 30 minutes"
 • "check adherence"
-**📋 Routines:**
+    def generate_answer(self, query, context_chunks):
-• "create morning routine with brush teeth, shower, eat"
+        print("[2. Generating Answer...]")
 • "start my morning routine"
 • "done" (complete current step)
 • "skip" (skip current step)
 • "pause/resume" (pause or continue)
 • "what steps are in my routine?"
 • "schedule workout for monday wednesday friday at 7am"
 • "show my stats"
-**💡 Tips:**
+        # Build the context string
-• I understand natural language, typos, and slang
+        context_text = "\n\n---\n\n".join([chunk['metadata']['text'] for chunk in context_chunks])
 • If I'm unsure, I'll ask for clarification
 • For important actions, I'll ask you to confirm with "yes" or "no"
 • When you're in a routine, shortcuts like "done", "skip", "pause" work automatically"""
    await message.channel.send(help_msg)
        system_prompt = """You are a helpful AI assistant specializing in DBT (Dialectical Behavior Therapy). 
 Use the provided context to answer the user's question. 
 If the answer is not in the context, say you don't know based on the provided text.
 Be concise and compassionate."""
-async def checkActiveSession(session):
+        user_prompt = f"""Context:
-    """Check if user has an active routine session and return details."""
+{context_text}
    token = session.get("token")
    if not token:
        return None
-    resp, status = apiRequest("get", "/api/sessions/active", token)
+Question: {query}"""
    if status == 200 and "session" in resp:
        return resp
    return None
        try:
            # Using a strong model for the final generation
            response = self.client.chat.completions.create(
                model="openai/gpt-4o-mini", # You can change this to "qwen/qwen-3-8b" or similar if desired
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                temperature=0.7
            )
-async def handleConfirmation(message, session):
+            return response.choices[0].message.content
    """Handle yes/no confirmation responses. Returns True if handled."""
    discord_id = message.author.id
    user_input = message.content.lower().strip()
-    if "pending_confirmations" not in session:
+        except Exception as e:
-        return False
+            return f"Error generating answer: {e}"
-    # Check for any pending confirmations
+    def process_query(self, query):
-    pending = session["pending_confirmations"]
+        # 1. Retrieve
-    if not pending:
+        context = self.retrieve_context(query)
        return False
-    # Get the most recent pending confirmation
+        if not context:
-    confirmation_id = list(pending.keys())[-1]
+            return "I couldn't find any relevant information in the knowledge base."
    confirmation_data = pending[confirmation_id]
-    if user_input in ("yes", "y", "yeah", "sure", "ok", "confirm"):
+        # Optional: Print sources for debugging
-        # Execute the confirmed action
+        print(f"   Found {len(context)} relevant chunks (Top score: {context[0]['score']:.4f})")
        del pending[confirmation_id]
-        interaction_type = confirmation_data.get("interaction_type")
+        # 2. Generate
-        handler = get_handler(interaction_type)
+        answer = self.generate_answer(query, context)
-        if handler:
+        return answer
            # Create a fake parsed object for the handler
            fake_parsed = confirmation_data.copy()
            fake_parsed["needs_confirmation"] = False
            await handler(message, session, fake_parsed)
        return True
-    elif user_input in ("no", "n", "nah", "cancel", "abort"):
+def main():
-        del pending[confirmation_id]
+    print("Initializing AI Jury System...")
-        await message.channel.send("❌ Cancelled.")
+    system = JurySystem()
        return True
-    return False
+    print("\nSystem Ready. Ask a question (or type 'exit').")
    while True:
        try:
            user_query = input("\nYou: ").strip()
-async def handleActiveSessionShortcuts(message, session, active_session):
+            if user_query.lower() in ['exit', 'quit']:
-    """Handle shortcuts like 'done', 'skip', 'next' when in active session."""
+                print("Goodbye!")
-    user_input = message.content.lower().strip()
+                break
-    # Map common shortcuts to actions
+            if not user_query:
-    shortcuts = {
+                continue
        "done": ("routine", "complete"),
        "finished": ("routine", "complete"),
        "complete": ("routine", "complete"),
        "next": ("routine", "complete"),
        "skip": ("routine", "skip"),
        "pass": ("routine", "skip"),
        "pause": ("routine", "pause"),
        "hold": ("routine", "pause"),
        "resume": ("routine", "resume"),
        "continue": ("routine", "resume"),
        "stop": ("routine", "cancel"),
        "quit": ("routine", "cancel"),
        "abort": ("routine", "abort"),
    }
-    if user_input in shortcuts:
+            response = system.process_query(user_query)
-        interaction_type, action = shortcuts[user_input]
+            print(f"\nAI: {response}")
        handler = get_handler(interaction_type)
        if handler:
            fake_parsed = {"action": action}
            await handler(message, session, fake_parsed)
            return True
    return False
 async def routeCommand(message):
    discord_id = message.author.id
    session = user_sessions[discord_id]
    user_input = message.content.lower()
    if "help" in user_input or "what can i say" in user_input:
        await sendHelpMessage(message)
        return
    # Check for active session first
    active_session = await checkActiveSession(session)
    # Handle confirmation responses
    confirmation_handled = await handleConfirmation(message, session)
    if confirmation_handled:
        return
    # Handle shortcuts when in active session
    if active_session:
        shortcut_handled = await handleActiveSessionShortcuts(
            message, session, active_session
        )
        if shortcut_handled:
            return
    async with message.channel.typing():
        history = message_history.get(discord_id, [])
        # Add context about active session to help AI understand
        context = ""
        if active_session:
            session_data = active_session.get("session", {})
            routine_name = session_data.get("routine_name", "a routine")
            current_step = session_data.get("current_step_index", 0) + 1
            total_steps = active_session.get("total_steps", 0)
            context = f"\n[Context: User is currently in active session for '{routine_name}', on step {current_step} of {total_steps}. They can say 'done', 'skip', 'pause', 'resume', or 'stop'.]"
        parsed = await ai_parser.parse(
            message.content + context, "command_parser", history=history
        )
        if discord_id not in message_history:
            message_history[discord_id] = []
        message_history[discord_id].append((message.content, parsed))
        message_history[discord_id] = message_history[discord_id][-5:]
    if "needs_clarification" in parsed:
        await message.channel.send(
            f"I'm not quite sure what you mean. {parsed['needs_clarification']}"
        )
        return
    if "error" in parsed:
        await message.channel.send(
            f"I had trouble understanding that: {parsed['error']}"
        )
        return
    interaction_type = parsed.get("interaction_type")
    handler = get_handler(interaction_type)
    if handler:
        await handler(message, session, parsed)
    else:
        registered = ", ".join(list_registered()) or "none"
        await message.channel.send(
            f"Unknown command type '{interaction_type}'. Registered modules: {registered}"
        )
@client.event
 async def on_ready():
    print(f"Bot logged in as {client.user}")
    loadCache()
    backgroundLoop.start()
@client.event
 async def on_message(message):
    if message.author == client.user:
        return
    if not isinstance(message.channel, discord.DMChannel):
        return
    discord_id = message.author.id
    if discord_id in login_state:
        await handleLoginStep(message)
        return
    if discord_id not in user_sessions:
        login_state[discord_id] = {"step": "username"}
        await message.channel.send("Welcome! Send your username to log in.")
        return
    await routeCommand(message)
@tasks.loop(seconds=60)
 async def backgroundLoop():
    """Override this in your domain module or extend as needed."""
    pass
@backgroundLoop.before_loop
 async def beforeBackgroundLoop():
    await client.wait_until_ready()
        except KeyboardInterrupt:
            print("\nGoodbye!")
            break
        except Exception as e:
            print(f"\nAn error occurred: {e}")
 if __name__ == "__main__":
-    client.run(DISCORD_BOT_TOKEN)
+    main()
--- a/bot/commands/knowledge.py
+++ b/bot/commands/knowledge.py
@@ -53,9 +53,24 @@ def load_knowledge_base(
    with open(file_path, "r") as f:
        data = json.load(f)
-    chunks = data.get("chunks", [])
+    # Handle both dict format {"chunks": [...], "embeddings": [...], "metadata": {...}}
-    embeddings = data.get("embeddings", [])
+    # and legacy list format where data is just the chunks
-    metadata = data.get("metadata", {})
+    if isinstance(data, dict):
        chunks = data.get("chunks", [])
        embeddings = data.get("embeddings", [])
        metadata = data.get("metadata", {})
    elif isinstance(data, list):
        # Legacy format: assume it's just chunks, or list of [chunk, embedding] pairs
        if data and isinstance(data[0], dict) and "text" in data[0]:
            # Format: [{"text": "...", "embedding": [...]}, ...]
            chunks = [item.get("text", "") for item in data]
            embeddings = [item.get("embedding", []) for item in data]
            metadata = {"format": "legacy_list_of_dicts"}
        else:
            # Unknown list format - can't process
            return None
    else:
        return None
    # Add file_path to metadata for reference
    metadata["_file_path"] = file_path
Author	SHA1	Message	Date
chelsea	c7be19611a	Merge branch 'main' of https://git.scorpi.us/chelsea/Synculous-2	2026-02-16 19:08:19 -06:00
chelsea	b1bb05e879	Fix knowledge base loader to handle list format embeddings Handle both dict and list formats in load_knowledge_base function to fix AttributeError	2026-02-16 19:06:31 -06:00