Convert JurySystem to Discord bot

2026-02-16 19:26:16 -06:00
parent 09d453017c
commit 2feaf0cdc0
1 changed files with 82 additions and 120 deletions
--- a/bot/bot.py
+++ b/bot/bot.py
@@ -1,14 +1,21 @@
 import os
 import json
 import time
 import numpy as np
 from openai import OpenAI
 import discord
 from discord.ext import commands
 # --- Configuration ---
-CONFIG_PATH = 'config.json'
+CONFIG_PATH = os.getenv("CONFIG_PATH", "config.json")
-KNOWLEDGE_BASE_PATH = 'dbt_knowledge.json'
+KNOWLEDGE_BASE_PATH = os.getenv(
    "KNOWLEDGE_BASE_PATH", "bot/data/dbt_knowledge.embeddings.json"
 )
 DISCORD_BOT_TOKEN = os.getenv("DISCORD_BOT_TOKEN")
 class SimpleVectorStore:
    """A simple in-memory vector store using NumPy."""
    def __init__(self):
        self.vectors = []
        self.metadata = []
@@ -21,175 +28,130 @@ class SimpleVectorStore:
        if not self.vectors:
            return []
        # Convert to numpy arrays for efficient math
        query_vec = np.array(query_vector)
        doc_vecs = np.array(self.vectors)
        # Cosine Similarity: (A . B) / (||A|| * ||B||)
        # Note: Both vectors must have the same dimension (e.g., 4096)
        norms = np.linalg.norm(doc_vecs, axis=1)
        # Avoid division by zero
        valid_indices = norms > 0
        scores = np.zeros(len(doc_vecs))
        # Calculate dot product
        dot_products = np.dot(doc_vecs, query_vec)
-        
+        scores[valid_indices] = dot_products[valid_indices] / (
-        # Calculate cosine similarity only for valid norms
+            norms[valid_indices] * np.linalg.norm(query_vec)
-        scores[valid_indices] = dot_products[valid_indices] / (norms[valid_indices] * np.linalg.norm(query_vec))
+        )
        # Get top_k indices
        top_indices = np.argsort(scores)[-top_k:][::-1]
-        
+
        results = []
        for idx in top_indices:
-            results.append({
+            results.append({"metadata": self.metadata[idx], "score": scores[idx]})
                "metadata": self.metadata[idx],
                "score": scores[idx]
            })
        return results
 class JurySystem:
    def __init__(self):
        self.config = self.load_config()
        # Initialize OpenRouter Client
        self.client = OpenAI(
            base_url="https://openrouter.ai/api/v1",
-            api_key=self.config['openrouter_api_key']
+            api_key=self.config["openrouter_api_key"],
        )
        self.vector_store = SimpleVectorStore()
        self.load_knowledge_base()
    def load_config(self):
-        with open(CONFIG_PATH, 'r') as f:
+        with open(CONFIG_PATH, "r") as f:
            return json.load(f)
    def load_knowledge_base(self):
        """Loads the pre-computed embeddings from the JSON file."""
        print(f"Loading knowledge base from {KNOWLEDGE_BASE_PATH}...")
        try:
-            with open(KNOWLEDGE_BASE_PATH, 'r', encoding='utf-8') as f:
+            with open(KNOWLEDGE_BASE_PATH, "r", encoding="utf-8") as f:
                data = json.load(f)
            vectors = []
            metadata = []
            for item in data:
-                vectors.append(item['embedding'])
+                vectors.append(item["embedding"])
-                metadata.append({
+                metadata.append(
-                    "id": item['id'],
+                    {"id": item["id"], "source": item["source"], "text": item["text"]}
-                    "source": item['source'],
+                )
                    "text": item['text']
                })
            self.vector_store.add(vectors, metadata)
            print(f"Loaded {len(vectors)} chunks into vector store.")
        except FileNotFoundError:
-            print(f"Error: {KNOWLEDGE_BASE_PATH} not found. Did you run the embedder script?")
+            print(f"Error: {KNOWLEDGE_BASE_PATH} not found.")
-            exit(1)
+            raise
        except Exception as e:
            print(f"Error loading knowledge base: {e}")
-            exit(1)
+            raise
-    def retrieve_context(self, query, top_k=5):
+    def process_query(self, query):
        print("[1. Retrieving Context...]")
        try:
            # --- CRITICAL FIX: Use the EXACT same model as the embedder ---
            # Embedder used: "qwen/qwen3-embedding-8b" -> Dimension 4096
            # We must use the same here to avoid shape mismatch.
            response = self.client.embeddings.create(
-                model="qwen/qwen3-embedding-8b", 
+                model="qwen/qwen3-embedding-8b", input=query
                input=query
            )
            query_emb = response.data[0].embedding
-            
+            context_chunks = self.vector_store.search(query_emb, top_k=5)
            # Search the vector store
            context_chunks = self.vector_store.search(query_emb, top_k=top_k)
            return context_chunks
        except Exception as e:
            print(f"Error retrieving context: {e}")
            return []
-    def generate_answer(self, query, context_chunks):
+            if not context_chunks:
-        print("[2. Generating Answer...]")
+                return "I couldn't find any relevant information in the knowledge base."
-        
+
-        # Build the context string
+            context_text = "\n\n---\n\n".join(
-        context_text = "\n\n---\n\n".join([chunk['metadata']['text'] for chunk in context_chunks])
+                [chunk["metadata"]["text"] for chunk in context_chunks]
-        
+            )
-        system_prompt = """You are a helpful AI assistant specializing in DBT (Dialectical Behavior Therapy). 
+
-Use the provided context to answer the user's question. 
+            system_prompt = """You are a helpful AI assistant specializing in DBT (Dialectical Behavior Therapy).
 Use the provided context to answer the user's question.
 If the answer is not in the context, say you don't know based on the provided text.
 Be concise and compassionate."""
-        user_prompt = f"""Context:
+            user_prompt = f"Context:\n{context_text}\n\nQuestion: {query}"
 {context_text}
 Question: {query}"""
        try:
            # Using a strong model for the final generation
            response = self.client.chat.completions.create(
-                model="openai/gpt-4o-mini", # You can change this to "qwen/qwen-3-8b" or similar if desired
+                model="openai/gpt-4o-mini",
                messages=[
                    {"role": "system", "content": system_prompt},
-                    {"role": "user", "content": user_prompt}
+                    {"role": "user", "content": user_prompt},
                ],
-                temperature=0.7
+                temperature=0.7,
            )
-            
+
            return response.choices[0].message.content
        except Exception as e:
-            return f"Error generating answer: {e}"
+            return f"Error processing query: {e}"
    def process_query(self, query):
        # 1. Retrieve
        context = self.retrieve_context(query)
        if not context:
            return "I couldn't find any relevant information in the knowledge base."
        # Optional: Print sources for debugging
        print(f"   Found {len(context)} relevant chunks (Top score: {context[0]['score']:.4f})")
        # 2. Generate
        answer = self.generate_answer(query, context)
        return answer
-def main():
+# Initialize the Jury System
-    print("Initializing AI Jury System...")
+print("Initializing AI Jury System...")
-    system = JurySystem()
+jury_system = JurySystem()
-    
+print("Jury System ready!")
    print("\nSystem Ready. Ask a question (or type 'exit').")
    while True:
        try:
            user_query = input("\nYou: ").strip()
            if user_query.lower() in ['exit', 'quit']:
                print("Goodbye!")
                break
            if not user_query:
                continue
            response = system.process_query(user_query)
            print(f"\nAI: {response}")
        except KeyboardInterrupt:
            print("\nGoodbye!")
            break
        except Exception as e:
            print(f"\nAn error occurred: {e}")
-if __name__ == "__main__":
+# Discord Bot Setup
-    main()
+intents = discord.Intents.default()
 intents.message_content = True
 bot = commands.Bot(command_prefix="!", intents=intents)
@bot.event
 async def on_ready():
    print(f"Bot logged in as {bot.user}")
@bot.event
 async def on_message(message):
    if message.author == bot.user:
        return
    # Process all messages as DBT queries
    if not message.content.startswith("!"):
        async with message.channel.typing():
            response = jury_system.process_query(message.content)
        await message.reply(response)
    await bot.process_commands(message)
@bot.command(name="ask")
 async def ask_dbt(ctx, *, question):
    """Ask a DBT-related question"""
    async with ctx.typing():
        response = jury_system.process_query(question)
    await ctx.send(response)
 bot.run(DISCORD_BOT_TOKEN)