Convert JurySystem to Discord bot

2026-02-16 19:26:16 -06:00
parent 09d453017c
commit 2feaf0cdc0
1 changed files with 82 additions and 120 deletions
--- a/bot/bot.py
+++ b/bot/bot.py
@@ -1,14 +1,21 @@
+import os
 import json
-import time
 import numpy as np
 from openai import OpenAI
+import discord
+from discord.ext import commands

 # --- Configuration ---
-CONFIG_PATH = 'config.json'
-KNOWLEDGE_BASE_PATH = 'dbt_knowledge.json'
+CONFIG_PATH = os.getenv("CONFIG_PATH", "config.json")
+KNOWLEDGE_BASE_PATH = os.getenv(
+    "KNOWLEDGE_BASE_PATH", "bot/data/dbt_knowledge.embeddings.json"
+)
+DISCORD_BOT_TOKEN = os.getenv("DISCORD_BOT_TOKEN")
+

 class SimpleVectorStore:
    """A simple in-memory vector store using NumPy."""
+
    def __init__(self):
        self.vectors = []
        self.metadata = []
@@ -21,175 +28,130 @@ class SimpleVectorStore:
        if not self.vectors:
            return []

-        # Convert to numpy arrays for efficient math
        query_vec = np.array(query_vector)
        doc_vecs = np.array(self.vectors)
-
-        # Cosine Similarity: (A . B) / (||A|| * ||B||)
-        # Note: Both vectors must have the same dimension (e.g., 4096)
        norms = np.linalg.norm(doc_vecs, axis=1)
-        
-        # Avoid division by zero
        valid_indices = norms > 0
        scores = np.zeros(len(doc_vecs))
-        
-        # Calculate dot product
        dot_products = np.dot(doc_vecs, query_vec)
-        
-        # Calculate cosine similarity only for valid norms
-        scores[valid_indices] = dot_products[valid_indices] / (norms[valid_indices] * np.linalg.norm(query_vec))
-        
-        # Get top_k indices
+        scores[valid_indices] = dot_products[valid_indices] / (
+            norms[valid_indices] * np.linalg.norm(query_vec)
+        )
        top_indices = np.argsort(scores)[-top_k:][::-1]
-        
+
        results = []
        for idx in top_indices:
-            results.append({
-                "metadata": self.metadata[idx],
-                "score": scores[idx]
-            })
+            results.append({"metadata": self.metadata[idx], "score": scores[idx]})
        return results

+
 class JurySystem:
    def __init__(self):
        self.config = self.load_config()
-        
-        # Initialize OpenRouter Client
        self.client = OpenAI(
            base_url="https://openrouter.ai/api/v1",
-            api_key=self.config['openrouter_api_key']
+            api_key=self.config["openrouter_api_key"],
        )
-        
        self.vector_store = SimpleVectorStore()
        self.load_knowledge_base()

    def load_config(self):
-        with open(CONFIG_PATH, 'r') as f:
+        with open(CONFIG_PATH, "r") as f:
            return json.load(f)

    def load_knowledge_base(self):
-        """Loads the pre-computed embeddings from the JSON file."""
        print(f"Loading knowledge base from {KNOWLEDGE_BASE_PATH}...")
        try:
-            with open(KNOWLEDGE_BASE_PATH, 'r', encoding='utf-8') as f:
+            with open(KNOWLEDGE_BASE_PATH, "r", encoding="utf-8") as f:
                data = json.load(f)
-                
            vectors = []
            metadata = []
-            
            for item in data:
-                vectors.append(item['embedding'])
-                metadata.append({
-                    "id": item['id'],
-                    "source": item['source'],
-                    "text": item['text']
-                })
-            
+                vectors.append(item["embedding"])
+                metadata.append(
+                    {"id": item["id"], "source": item["source"], "text": item["text"]}
+                )
            self.vector_store.add(vectors, metadata)
            print(f"Loaded {len(vectors)} chunks into vector store.")
-            
        except FileNotFoundError:
-            print(f"Error: {KNOWLEDGE_BASE_PATH} not found. Did you run the embedder script?")
-            exit(1)
+            print(f"Error: {KNOWLEDGE_BASE_PATH} not found.")
+            raise
        except Exception as e:
            print(f"Error loading knowledge base: {e}")
-            exit(1)
+            raise

-    def retrieve_context(self, query, top_k=5):
-        print("[1. Retrieving Context...]")
-        
+    def process_query(self, query):
        try:
-            # --- CRITICAL FIX: Use the EXACT same model as the embedder ---
-            # Embedder used: "qwen/qwen3-embedding-8b" -> Dimension 4096
-            # We must use the same here to avoid shape mismatch.
            response = self.client.embeddings.create(
-                model="qwen/qwen3-embedding-8b", 
-                input=query
+                model="qwen/qwen3-embedding-8b", input=query
            )
-            
            query_emb = response.data[0].embedding
-            
-            # Search the vector store
-            context_chunks = self.vector_store.search(query_emb, top_k=top_k)
-            
-            return context_chunks
-            
-        except Exception as e:
-            print(f"Error retrieving context: {e}")
-            return []
+            context_chunks = self.vector_store.search(query_emb, top_k=5)

-    def generate_answer(self, query, context_chunks):
-        print("[2. Generating Answer...]")
-        
-        # Build the context string
-        context_text = "\n\n---\n\n".join([chunk['metadata']['text'] for chunk in context_chunks])
-        
-        system_prompt = """You are a helpful AI assistant specializing in DBT (Dialectical Behavior Therapy). 
-Use the provided context to answer the user's question. 
+            if not context_chunks:
+                return "I couldn't find any relevant information in the knowledge base."
+
+            context_text = "\n\n---\n\n".join(
+                [chunk["metadata"]["text"] for chunk in context_chunks]
+            )
+
+            system_prompt = """You are a helpful AI assistant specializing in DBT (Dialectical Behavior Therapy).
+Use the provided context to answer the user's question.
 If the answer is not in the context, say you don't know based on the provided text.
 Be concise and compassionate."""

-        user_prompt = f"""Context:
-{context_text}
+            user_prompt = f"Context:\n{context_text}\n\nQuestion: {query}"

-Question: {query}"""
-
-        try:
-            # Using a strong model for the final generation
            response = self.client.chat.completions.create(
-                model="openai/gpt-4o-mini", # You can change this to "qwen/qwen-3-8b" or similar if desired
+                model="openai/gpt-4o-mini",
                messages=[
                    {"role": "system", "content": system_prompt},
-                    {"role": "user", "content": user_prompt}
+                    {"role": "user", "content": user_prompt},
                ],
-                temperature=0.7
+                temperature=0.7,
            )
-            
+
            return response.choices[0].message.content
-            
        except Exception as e:
-            return f"Error generating answer: {e}"
+            return f"Error processing query: {e}"

-    def process_query(self, query):
-        # 1. Retrieve
-        context = self.retrieve_context(query)
-        
-        if not context:
-            return "I couldn't find any relevant information in the knowledge base."
-        
-        # Optional: Print sources for debugging
-        print(f"   Found {len(context)} relevant chunks (Top score: {context[0]['score']:.4f})")
-        
-        # 2. Generate
-        answer = self.generate_answer(query, context)
-        
-        return answer

-def main():
-    print("Initializing AI Jury System...")
-    system = JurySystem()
-    
-    print("\nSystem Ready. Ask a question (or type 'exit').")
-    
-    while True:
-        try:
-            user_query = input("\nYou: ").strip()
-            
-            if user_query.lower() in ['exit', 'quit']:
-                print("Goodbye!")
-                break
-            
-            if not user_query:
-                continue
-                
-            response = system.process_query(user_query)
-            print(f"\nAI: {response}")
-            
-        except KeyboardInterrupt:
-            print("\nGoodbye!")
-            break
-        except Exception as e:
-            print(f"\nAn error occurred: {e}")
+# Initialize the Jury System
+print("Initializing AI Jury System...")
+jury_system = JurySystem()
+print("Jury System ready!")

-if __name__ == "__main__":
-    main()
+# Discord Bot Setup
+intents = discord.Intents.default()
+intents.message_content = True
+bot = commands.Bot(command_prefix="!", intents=intents)
+
+
+@bot.event
+async def on_ready():
+    print(f"Bot logged in as {bot.user}")
+
+
+@bot.event
+async def on_message(message):
+    if message.author == bot.user:
+        return
+
+    # Process all messages as DBT queries
+    if not message.content.startswith("!"):
+        async with message.channel.typing():
+            response = jury_system.process_query(message.content)
+        await message.reply(response)
+
+    await bot.process_commands(message)
+
+
+@bot.command(name="ask")
+async def ask_dbt(ctx, *, question):
+    """Ask a DBT-related question"""
+    async with ctx.typing():
+        response = jury_system.process_query(question)
+    await ctx.send(response)
+
+
+bot.run(DISCORD_BOT_TOKEN)