diff --git a/bot/bot.py b/bot/bot.py index eefc5b2..e6e1e16 100644 --- a/bot/bot.py +++ b/bot/bot.py @@ -1,14 +1,21 @@ +import os import json -import time import numpy as np from openai import OpenAI +import discord +from discord.ext import commands # --- Configuration --- -CONFIG_PATH = 'config.json' -KNOWLEDGE_BASE_PATH = 'dbt_knowledge.json' +CONFIG_PATH = os.getenv("CONFIG_PATH", "config.json") +KNOWLEDGE_BASE_PATH = os.getenv( + "KNOWLEDGE_BASE_PATH", "bot/data/dbt_knowledge.embeddings.json" +) +DISCORD_BOT_TOKEN = os.getenv("DISCORD_BOT_TOKEN") + class SimpleVectorStore: """A simple in-memory vector store using NumPy.""" + def __init__(self): self.vectors = [] self.metadata = [] @@ -21,175 +28,130 @@ class SimpleVectorStore: if not self.vectors: return [] - # Convert to numpy arrays for efficient math query_vec = np.array(query_vector) doc_vecs = np.array(self.vectors) - - # Cosine Similarity: (A . B) / (||A|| * ||B||) - # Note: Both vectors must have the same dimension (e.g., 4096) norms = np.linalg.norm(doc_vecs, axis=1) - - # Avoid division by zero valid_indices = norms > 0 scores = np.zeros(len(doc_vecs)) - - # Calculate dot product dot_products = np.dot(doc_vecs, query_vec) - - # Calculate cosine similarity only for valid norms - scores[valid_indices] = dot_products[valid_indices] / (norms[valid_indices] * np.linalg.norm(query_vec)) - - # Get top_k indices + scores[valid_indices] = dot_products[valid_indices] / ( + norms[valid_indices] * np.linalg.norm(query_vec) + ) top_indices = np.argsort(scores)[-top_k:][::-1] - + results = [] for idx in top_indices: - results.append({ - "metadata": self.metadata[idx], - "score": scores[idx] - }) + results.append({"metadata": self.metadata[idx], "score": scores[idx]}) return results + class JurySystem: def __init__(self): self.config = self.load_config() - - # Initialize OpenRouter Client self.client = OpenAI( base_url="https://openrouter.ai/api/v1", - api_key=self.config['openrouter_api_key'] + api_key=self.config["openrouter_api_key"], ) - self.vector_store = SimpleVectorStore() self.load_knowledge_base() def load_config(self): - with open(CONFIG_PATH, 'r') as f: + with open(CONFIG_PATH, "r") as f: return json.load(f) def load_knowledge_base(self): - """Loads the pre-computed embeddings from the JSON file.""" print(f"Loading knowledge base from {KNOWLEDGE_BASE_PATH}...") try: - with open(KNOWLEDGE_BASE_PATH, 'r', encoding='utf-8') as f: + with open(KNOWLEDGE_BASE_PATH, "r", encoding="utf-8") as f: data = json.load(f) - vectors = [] metadata = [] - for item in data: - vectors.append(item['embedding']) - metadata.append({ - "id": item['id'], - "source": item['source'], - "text": item['text'] - }) - + vectors.append(item["embedding"]) + metadata.append( + {"id": item["id"], "source": item["source"], "text": item["text"]} + ) self.vector_store.add(vectors, metadata) print(f"Loaded {len(vectors)} chunks into vector store.") - except FileNotFoundError: - print(f"Error: {KNOWLEDGE_BASE_PATH} not found. Did you run the embedder script?") - exit(1) + print(f"Error: {KNOWLEDGE_BASE_PATH} not found.") + raise except Exception as e: print(f"Error loading knowledge base: {e}") - exit(1) + raise - def retrieve_context(self, query, top_k=5): - print("[1. Retrieving Context...]") - + def process_query(self, query): try: - # --- CRITICAL FIX: Use the EXACT same model as the embedder --- - # Embedder used: "qwen/qwen3-embedding-8b" -> Dimension 4096 - # We must use the same here to avoid shape mismatch. response = self.client.embeddings.create( - model="qwen/qwen3-embedding-8b", - input=query + model="qwen/qwen3-embedding-8b", input=query ) - query_emb = response.data[0].embedding - - # Search the vector store - context_chunks = self.vector_store.search(query_emb, top_k=top_k) - - return context_chunks - - except Exception as e: - print(f"Error retrieving context: {e}") - return [] + context_chunks = self.vector_store.search(query_emb, top_k=5) - def generate_answer(self, query, context_chunks): - print("[2. Generating Answer...]") - - # Build the context string - context_text = "\n\n---\n\n".join([chunk['metadata']['text'] for chunk in context_chunks]) - - system_prompt = """You are a helpful AI assistant specializing in DBT (Dialectical Behavior Therapy). -Use the provided context to answer the user's question. + if not context_chunks: + return "I couldn't find any relevant information in the knowledge base." + + context_text = "\n\n---\n\n".join( + [chunk["metadata"]["text"] for chunk in context_chunks] + ) + + system_prompt = """You are a helpful AI assistant specializing in DBT (Dialectical Behavior Therapy). +Use the provided context to answer the user's question. If the answer is not in the context, say you don't know based on the provided text. Be concise and compassionate.""" - user_prompt = f"""Context: -{context_text} + user_prompt = f"Context:\n{context_text}\n\nQuestion: {query}" -Question: {query}""" - - try: - # Using a strong model for the final generation response = self.client.chat.completions.create( - model="openai/gpt-4o-mini", # You can change this to "qwen/qwen-3-8b" or similar if desired + model="openai/gpt-4o-mini", messages=[ {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt} + {"role": "user", "content": user_prompt}, ], - temperature=0.7 + temperature=0.7, ) - + return response.choices[0].message.content - except Exception as e: - return f"Error generating answer: {e}" + return f"Error processing query: {e}" - def process_query(self, query): - # 1. Retrieve - context = self.retrieve_context(query) - - if not context: - return "I couldn't find any relevant information in the knowledge base." - - # Optional: Print sources for debugging - print(f" Found {len(context)} relevant chunks (Top score: {context[0]['score']:.4f})") - - # 2. Generate - answer = self.generate_answer(query, context) - - return answer -def main(): - print("Initializing AI Jury System...") - system = JurySystem() - - print("\nSystem Ready. Ask a question (or type 'exit').") - - while True: - try: - user_query = input("\nYou: ").strip() - - if user_query.lower() in ['exit', 'quit']: - print("Goodbye!") - break - - if not user_query: - continue - - response = system.process_query(user_query) - print(f"\nAI: {response}") - - except KeyboardInterrupt: - print("\nGoodbye!") - break - except Exception as e: - print(f"\nAn error occurred: {e}") +# Initialize the Jury System +print("Initializing AI Jury System...") +jury_system = JurySystem() +print("Jury System ready!") -if __name__ == "__main__": - main() \ No newline at end of file +# Discord Bot Setup +intents = discord.Intents.default() +intents.message_content = True +bot = commands.Bot(command_prefix="!", intents=intents) + + +@bot.event +async def on_ready(): + print(f"Bot logged in as {bot.user}") + + +@bot.event +async def on_message(message): + if message.author == bot.user: + return + + # Process all messages as DBT queries + if not message.content.startswith("!"): + async with message.channel.typing(): + response = jury_system.process_query(message.content) + await message.reply(response) + + await bot.process_commands(message) + + +@bot.command(name="ask") +async def ask_dbt(ctx, *, question): + """Ask a DBT-related question""" + async with ctx.typing(): + response = jury_system.process_query(question) + await ctx.send(response) + + +bot.run(DISCORD_BOT_TOKEN)