Merge branch 'main' of https://git.scorpi.us/chelsea/Synculous-2

Fix knowledge base loader to handle list format embeddings
Handle both dict and list formats in load_knowledge_base function to fix AttributeError
2026-02-16 19:08:19 -06:00 · 2026-02-16 19:06:31 -06:00
2 changed files with 191 additions and 390 deletions
--- a/bot/bot.py
+++ b/bot/bot.py
@@ -1,409 +1,195 @@
 """
 bot.py - Discord bot client with session management and command routing
 Features:
 - Login flow with username/password
 - Session management with JWT tokens
 - AI-powered command parsing via registry
 - Background task loop for polling
 """
 import discord
 from discord.ext import tasks
 import os
 import sys
 import json
-import base64
+import time
-import requests
+import numpy as np
-import bcrypt
+from openai import OpenAI
 import pickle
-from bot.command_registry import get_handler, list_registered
+# --- Configuration ---
-import ai.parser as ai_parser
+CONFIG_PATH = 'config.json'
-import bot.commands.routines  # noqa: F401 - registers handler
+KNOWLEDGE_BASE_PATH = 'dbt_knowledge.json'
 import bot.commands.medications  # noqa: F401 - registers handler
 import bot.commands.knowledge  # noqa: F401 - registers handler
-DISCORD_BOT_TOKEN = os.getenv("DISCORD_BOT_TOKEN")
+class SimpleVectorStore:
-API_URL = os.getenv("API_URL", "http://app:5000")
+    """A simple in-memory vector store using NumPy."""
    def __init__(self):
        self.vectors = []
        self.metadata = []
-user_sessions = {}
+    def add(self, vectors, metadatas):
-login_state = {}
+        self.vectors.extend(vectors)
-message_history = {}
+        self.metadata.extend(metadatas)
 user_cache = {}
 CACHE_FILE = "/app/user_cache.pkl"
-intents = discord.Intents.default()
+    def search(self, query_vector, top_k=5):
-intents.message_content = True
+        if not self.vectors:
            return []
-client = discord.Client(intents=intents)
+        # Convert to numpy arrays for efficient math
        query_vec = np.array(query_vector)
        doc_vecs = np.array(self.vectors)
        # Cosine Similarity: (A . B) / (||A|| * ||B||)
        # Note: Both vectors must have the same dimension (e.g., 4096)
        norms = np.linalg.norm(doc_vecs, axis=1)
-def decodeJwtPayload(token):
+        # Avoid division by zero
-    payload = token.split(".")[1]
+        valid_indices = norms > 0
-    payload += "=" * (4 - len(payload) % 4)
+        scores = np.zeros(len(doc_vecs))
    return json.loads(base64.urlsafe_b64decode(payload))
        # Calculate dot product
        dot_products = np.dot(doc_vecs, query_vec)
-def apiRequest(method, endpoint, token=None, data=None):
+        # Calculate cosine similarity only for valid norms
-    url = f"{API_URL}{endpoint}"
+        scores[valid_indices] = dot_products[valid_indices] / (norms[valid_indices] * np.linalg.norm(query_vec))
-    headers = {"Content-Type": "application/json"}
+        
-    if token:
+        # Get top_k indices
-        headers["Authorization"] = f"Bearer {token}"
+        top_indices = np.argsort(scores)[-top_k:][::-1]
        results = []
        for idx in top_indices:
            results.append({
                "metadata": self.metadata[idx],
                "score": scores[idx]
            })
        return results
 class JurySystem:
    def __init__(self):
        self.config = self.load_config()
        # Initialize OpenRouter Client
        self.client = OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=self.config['openrouter_api_key']
        )
        self.vector_store = SimpleVectorStore()
        self.load_knowledge_base()
    def load_config(self):
        with open(CONFIG_PATH, 'r') as f:
            return json.load(f)
    def load_knowledge_base(self):
        """Loads the pre-computed embeddings from the JSON file."""
        print(f"Loading knowledge base from {KNOWLEDGE_BASE_PATH}...")
        try:
-        resp = getattr(requests, method)(url, headers=headers, json=data, timeout=10)
+            with open(KNOWLEDGE_BASE_PATH, 'r', encoding='utf-8') as f:
-        try:
+                data = json.load(f)
            return resp.json(), resp.status_code
        except ValueError:
            return {}, resp.status_code
    except requests.RequestException:
        return {"error": "API unavailable"}, 503
            vectors = []
            metadata = []
-def loadCache():
+            for item in data:
-    try:
+                vectors.append(item['embedding'])
-        if os.path.exists(CACHE_FILE):
+                metadata.append({
-            with open(CACHE_FILE, "rb") as f:
+                    "id": item['id'],
-                global user_cache
+                    "source": item['source'],
-                user_cache = pickle.load(f)
+                    "text": item['text']
-                print(f"Loaded cache for {len(user_cache)} users")
+                })
            self.vector_store.add(vectors, metadata)
            print(f"Loaded {len(vectors)} chunks into vector store.")
        except FileNotFoundError:
            print(f"Error: {KNOWLEDGE_BASE_PATH} not found. Did you run the embedder script?")
            exit(1)
        except Exception as e:
-        print(f"Error loading cache: {e}")
+            print(f"Error loading knowledge base: {e}")
            exit(1)
    def retrieve_context(self, query, top_k=5):
        print("[1. Retrieving Context...]")
 def saveCache():
        try:
-        with open(CACHE_FILE, "wb") as f:
+            # --- CRITICAL FIX: Use the EXACT same model as the embedder ---
-            pickle.dump(user_cache, f)
+            # Embedder used: "qwen/qwen3-embedding-8b" -> Dimension 4096
            # We must use the same here to avoid shape mismatch.
            response = self.client.embeddings.create(
                model="qwen/qwen3-embedding-8b", 
                input=query
            )
            query_emb = response.data[0].embedding
            # Search the vector store
            context_chunks = self.vector_store.search(query_emb, top_k=top_k)
            return context_chunks
        except Exception as e:
-        print(f"Error saving cache: {e}")
+            print(f"Error retrieving context: {e}")
            return []
    def generate_answer(self, query, context_chunks):
        print("[2. Generating Answer...]")
-def hashPassword(password):
+        # Build the context string
-    return bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8")
+        context_text = "\n\n---\n\n".join([chunk['metadata']['text'] for chunk in context_chunks])
        system_prompt = """You are a helpful AI assistant specializing in DBT (Dialectical Behavior Therapy). 
 Use the provided context to answer the user's question. 
 If the answer is not in the context, say you don't know based on the provided text.
 Be concise and compassionate."""
-def verifyPassword(password, hashed):
+        user_prompt = f"""Context:
-    return bcrypt.checkpw(password.encode("utf-8"), hashed.encode("utf-8"))
+{context_text}
 Question: {query}"""
-def getCachedUser(discord_id):
+        try:
-    return user_cache.get(discord_id)
+            # Using a strong model for the final generation
-
+            response = self.client.chat.completions.create(
-
+                model="openai/gpt-4o-mini", # You can change this to "qwen/qwen-3-8b" or similar if desired
-def setCachedUser(discord_id, user_data):
+                messages=[
-    user_cache[discord_id] = user_data
+                    {"role": "system", "content": system_prompt},
-    saveCache()
+                    {"role": "user", "content": user_prompt}
-
+                ],
-
+                temperature=0.7
 def negotiateToken(discord_id, username, password):
    cached = getCachedUser(discord_id)
    if (
        cached
        and cached.get("username") == username
        and verifyPassword(password, cached.get("hashed_password"))
    ):
        result, status = apiRequest(
            "post", "/api/login", data={"username": username, "password": password}
        )
        if status == 200 and "token" in result:
            token = result["token"]
            payload = decodeJwtPayload(token)
            user_uuid = payload["sub"]
            setCachedUser(
                discord_id,
                {
                    "hashed_password": cached["hashed_password"],
                    "user_uuid": user_uuid,
                    "username": username,
                },
            )
            return token, user_uuid
        return None, None
    result, status = apiRequest(
        "post", "/api/login", data={"username": username, "password": password}
    )
    if status == 200 and "token" in result:
        token = result["token"]
        payload = decodeJwtPayload(token)
        user_uuid = payload["sub"]
        setCachedUser(
            discord_id,
            {
                "hashed_password": hashPassword(password),
                "user_uuid": user_uuid,
                "username": username,
            },
        )
        return token, user_uuid
    return None, None
 async def handleAuthFailure(message):
    discord_id = message.author.id
    user_sessions.pop(discord_id, None)
    await message.channel.send(
        "Your session has expired. Send any message to log in again."
            )
            return response.choices[0].message.content
-async def handleLoginStep(message):
+        except Exception as e:
-    discord_id = message.author.id
+            return f"Error generating answer: {e}"
    state = login_state[discord_id]
-    if state["step"] == "username":
+    def process_query(self, query):
-        state["username"] = message.content.strip()
+        # 1. Retrieve
-        state["step"] = "password"
+        context = self.retrieve_context(query)
        await message.channel.send("Password?")
-    elif state["step"] == "password":
+        if not context:
-        username = state["username"]
+            return "I couldn't find any relevant information in the knowledge base."
        password = message.content.strip()
        del login_state[discord_id]
-        token, user_uuid = negotiateToken(discord_id, username, password)
+        # Optional: Print sources for debugging
        print(f"   Found {len(context)} relevant chunks (Top score: {context[0]['score']:.4f})")
-        if token and user_uuid:
+        # 2. Generate
-            user_sessions[discord_id] = {
+        answer = self.generate_answer(query, context)
                "token": token,
                "user_uuid": user_uuid,
                "username": username,
            }
            registered = ", ".join(list_registered()) or "none"
            await message.channel.send(
                f"Welcome back **{username}**!\n\n"
                f"Registered modules: {registered}\n\n"
                f"Send 'help' for available commands."
            )
        else:
            await message.channel.send(
                "Invalid credentials. Send any message to try again."
            )
        return answer
-async def sendHelpMessage(message):
+def main():
-    help_msg = """**🤖 Synculous Bot - Natural Language Commands**
+    print("Initializing AI Jury System...")
    system = JurySystem()
-Just talk to me naturally! Here are some examples:
+    print("\nSystem Ready. Ask a question (or type 'exit').")
-**💊 Medications:**
+    while True:
-• "add lsd 50 mcg every tuesday at 4:20pm"
+        try:
-• "take my wellbutrin"
+            user_query = input("\nYou: ").strip()
 • "what meds do i have today?"
 • "show my refills"
 • "snooze my reminder for 30 minutes"
 • "check adherence"
-**📋 Routines:**
+            if user_query.lower() in ['exit', 'quit']:
-• "create morning routine with brush teeth, shower, eat"
+                print("Goodbye!")
-• "start my morning routine"
+                break
 • "done" (complete current step)
 • "skip" (skip current step)
 • "pause/resume" (pause or continue)
 • "what steps are in my routine?"
 • "schedule workout for monday wednesday friday at 7am"
 • "show my stats"
-**💡 Tips:**
+            if not user_query:
-• I understand natural language, typos, and slang
+                continue
 • If I'm unsure, I'll ask for clarification
 • For important actions, I'll ask you to confirm with "yes" or "no"
 • When you're in a routine, shortcuts like "done", "skip", "pause" work automatically"""
    await message.channel.send(help_msg)
            response = system.process_query(user_query)
            print(f"\nAI: {response}")
-async def checkActiveSession(session):
+        except KeyboardInterrupt:
-    """Check if user has an active routine session and return details."""
+            print("\nGoodbye!")
-    token = session.get("token")
+            break
-    if not token:
+        except Exception as e:
-        return None
+            print(f"\nAn error occurred: {e}")
    resp, status = apiRequest("get", "/api/sessions/active", token)
    if status == 200 and "session" in resp:
        return resp
    return None
 async def handleConfirmation(message, session):
    """Handle yes/no confirmation responses. Returns True if handled."""
    discord_id = message.author.id
    user_input = message.content.lower().strip()
    if "pending_confirmations" not in session:
        return False
    # Check for any pending confirmations
    pending = session["pending_confirmations"]
    if not pending:
        return False
    # Get the most recent pending confirmation
    confirmation_id = list(pending.keys())[-1]
    confirmation_data = pending[confirmation_id]
    if user_input in ("yes", "y", "yeah", "sure", "ok", "confirm"):
        # Execute the confirmed action
        del pending[confirmation_id]
        interaction_type = confirmation_data.get("interaction_type")
        handler = get_handler(interaction_type)
        if handler:
            # Create a fake parsed object for the handler
            fake_parsed = confirmation_data.copy()
            fake_parsed["needs_confirmation"] = False
            await handler(message, session, fake_parsed)
        return True
    elif user_input in ("no", "n", "nah", "cancel", "abort"):
        del pending[confirmation_id]
        await message.channel.send("❌ Cancelled.")
        return True
    return False
 async def handleActiveSessionShortcuts(message, session, active_session):
    """Handle shortcuts like 'done', 'skip', 'next' when in active session."""
    user_input = message.content.lower().strip()
    # Map common shortcuts to actions
    shortcuts = {
        "done": ("routine", "complete"),
        "finished": ("routine", "complete"),
        "complete": ("routine", "complete"),
        "next": ("routine", "complete"),
        "skip": ("routine", "skip"),
        "pass": ("routine", "skip"),
        "pause": ("routine", "pause"),
        "hold": ("routine", "pause"),
        "resume": ("routine", "resume"),
        "continue": ("routine", "resume"),
        "stop": ("routine", "cancel"),
        "quit": ("routine", "cancel"),
        "abort": ("routine", "abort"),
    }
    if user_input in shortcuts:
        interaction_type, action = shortcuts[user_input]
        handler = get_handler(interaction_type)
        if handler:
            fake_parsed = {"action": action}
            await handler(message, session, fake_parsed)
            return True
    return False
 async def routeCommand(message):
    discord_id = message.author.id
    session = user_sessions[discord_id]
    user_input = message.content.lower()
    if "help" in user_input or "what can i say" in user_input:
        await sendHelpMessage(message)
        return
    # Check for active session first
    active_session = await checkActiveSession(session)
    # Handle confirmation responses
    confirmation_handled = await handleConfirmation(message, session)
    if confirmation_handled:
        return
    # Handle shortcuts when in active session
    if active_session:
        shortcut_handled = await handleActiveSessionShortcuts(
            message, session, active_session
        )
        if shortcut_handled:
            return
    async with message.channel.typing():
        history = message_history.get(discord_id, [])
        # Add context about active session to help AI understand
        context = ""
        if active_session:
            session_data = active_session.get("session", {})
            routine_name = session_data.get("routine_name", "a routine")
            current_step = session_data.get("current_step_index", 0) + 1
            total_steps = active_session.get("total_steps", 0)
            context = f"\n[Context: User is currently in active session for '{routine_name}', on step {current_step} of {total_steps}. They can say 'done', 'skip', 'pause', 'resume', or 'stop'.]"
        parsed = await ai_parser.parse(
            message.content + context, "command_parser", history=history
        )
        if discord_id not in message_history:
            message_history[discord_id] = []
        message_history[discord_id].append((message.content, parsed))
        message_history[discord_id] = message_history[discord_id][-5:]
    if "needs_clarification" in parsed:
        await message.channel.send(
            f"I'm not quite sure what you mean. {parsed['needs_clarification']}"
        )
        return
    if "error" in parsed:
        await message.channel.send(
            f"I had trouble understanding that: {parsed['error']}"
        )
        return
    interaction_type = parsed.get("interaction_type")
    handler = get_handler(interaction_type)
    if handler:
        await handler(message, session, parsed)
    else:
        registered = ", ".join(list_registered()) or "none"
        await message.channel.send(
            f"Unknown command type '{interaction_type}'. Registered modules: {registered}"
        )
@client.event
 async def on_ready():
    print(f"Bot logged in as {client.user}")
    loadCache()
    backgroundLoop.start()
@client.event
 async def on_message(message):
    if message.author == client.user:
        return
    if not isinstance(message.channel, discord.DMChannel):
        return
    discord_id = message.author.id
    if discord_id in login_state:
        await handleLoginStep(message)
        return
    if discord_id not in user_sessions:
        login_state[discord_id] = {"step": "username"}
        await message.channel.send("Welcome! Send your username to log in.")
        return
    await routeCommand(message)
@tasks.loop(seconds=60)
 async def backgroundLoop():
    """Override this in your domain module or extend as needed."""
    pass
@backgroundLoop.before_loop
 async def beforeBackgroundLoop():
    await client.wait_until_ready()
 if __name__ == "__main__":
-    client.run(DISCORD_BOT_TOKEN)
+    main()
--- a/bot/commands/knowledge.py
+++ b/bot/commands/knowledge.py
@@ -53,9 +53,24 @@ def load_knowledge_base(
    with open(file_path, "r") as f:
        data = json.load(f)
    # Handle both dict format {"chunks": [...], "embeddings": [...], "metadata": {...}}
    # and legacy list format where data is just the chunks
    if isinstance(data, dict):
        chunks = data.get("chunks", [])
        embeddings = data.get("embeddings", [])
        metadata = data.get("metadata", {})
    elif isinstance(data, list):
        # Legacy format: assume it's just chunks, or list of [chunk, embedding] pairs
        if data and isinstance(data[0], dict) and "text" in data[0]:
            # Format: [{"text": "...", "embedding": [...]}, ...]
            chunks = [item.get("text", "") for item in data]
            embeddings = [item.get("embedding", []) for item in data]
            metadata = {"format": "legacy_list_of_dicts"}
        else:
            # Unknown list format - can't process
            return None
    else:
        return None
    # Add file_path to metadata for reference
    metadata["_file_path"] = file_path
Author	SHA1	Message	Date
chelsea	c7be19611a	Merge branch 'main' of https://git.scorpi.us/chelsea/Synculous-2	2026-02-16 19:08:19 -06:00
chelsea	b1bb05e879	Fix knowledge base loader to handle list format embeddings Handle both dict and list formats in load_knowledge_base function to fix AttributeError	2026-02-16 19:06:31 -06:00