Convert JurySystem to Discord bot

This commit is contained in:
2026-02-16 19:26:16 -06:00
parent 09d453017c
commit 2feaf0cdc0

View File

@@ -1,14 +1,21 @@
import os
import json import json
import time
import numpy as np import numpy as np
from openai import OpenAI from openai import OpenAI
import discord
from discord.ext import commands
# --- Configuration --- # --- Configuration ---
CONFIG_PATH = 'config.json' CONFIG_PATH = os.getenv("CONFIG_PATH", "config.json")
KNOWLEDGE_BASE_PATH = 'dbt_knowledge.json' KNOWLEDGE_BASE_PATH = os.getenv(
"KNOWLEDGE_BASE_PATH", "bot/data/dbt_knowledge.embeddings.json"
)
DISCORD_BOT_TOKEN = os.getenv("DISCORD_BOT_TOKEN")
class SimpleVectorStore: class SimpleVectorStore:
"""A simple in-memory vector store using NumPy.""" """A simple in-memory vector store using NumPy."""
def __init__(self): def __init__(self):
self.vectors = [] self.vectors = []
self.metadata = [] self.metadata = []
@@ -21,175 +28,130 @@ class SimpleVectorStore:
if not self.vectors: if not self.vectors:
return [] return []
# Convert to numpy arrays for efficient math
query_vec = np.array(query_vector) query_vec = np.array(query_vector)
doc_vecs = np.array(self.vectors) doc_vecs = np.array(self.vectors)
# Cosine Similarity: (A . B) / (||A|| * ||B||)
# Note: Both vectors must have the same dimension (e.g., 4096)
norms = np.linalg.norm(doc_vecs, axis=1) norms = np.linalg.norm(doc_vecs, axis=1)
# Avoid division by zero
valid_indices = norms > 0 valid_indices = norms > 0
scores = np.zeros(len(doc_vecs)) scores = np.zeros(len(doc_vecs))
# Calculate dot product
dot_products = np.dot(doc_vecs, query_vec) dot_products = np.dot(doc_vecs, query_vec)
scores[valid_indices] = dot_products[valid_indices] / (
# Calculate cosine similarity only for valid norms norms[valid_indices] * np.linalg.norm(query_vec)
scores[valid_indices] = dot_products[valid_indices] / (norms[valid_indices] * np.linalg.norm(query_vec)) )
# Get top_k indices
top_indices = np.argsort(scores)[-top_k:][::-1] top_indices = np.argsort(scores)[-top_k:][::-1]
results = [] results = []
for idx in top_indices: for idx in top_indices:
results.append({ results.append({"metadata": self.metadata[idx], "score": scores[idx]})
"metadata": self.metadata[idx],
"score": scores[idx]
})
return results return results
class JurySystem: class JurySystem:
def __init__(self): def __init__(self):
self.config = self.load_config() self.config = self.load_config()
# Initialize OpenRouter Client
self.client = OpenAI( self.client = OpenAI(
base_url="https://openrouter.ai/api/v1", base_url="https://openrouter.ai/api/v1",
api_key=self.config['openrouter_api_key'] api_key=self.config["openrouter_api_key"],
) )
self.vector_store = SimpleVectorStore() self.vector_store = SimpleVectorStore()
self.load_knowledge_base() self.load_knowledge_base()
def load_config(self): def load_config(self):
with open(CONFIG_PATH, 'r') as f: with open(CONFIG_PATH, "r") as f:
return json.load(f) return json.load(f)
def load_knowledge_base(self): def load_knowledge_base(self):
"""Loads the pre-computed embeddings from the JSON file."""
print(f"Loading knowledge base from {KNOWLEDGE_BASE_PATH}...") print(f"Loading knowledge base from {KNOWLEDGE_BASE_PATH}...")
try: try:
with open(KNOWLEDGE_BASE_PATH, 'r', encoding='utf-8') as f: with open(KNOWLEDGE_BASE_PATH, "r", encoding="utf-8") as f:
data = json.load(f) data = json.load(f)
vectors = [] vectors = []
metadata = [] metadata = []
for item in data: for item in data:
vectors.append(item['embedding']) vectors.append(item["embedding"])
metadata.append({ metadata.append(
"id": item['id'], {"id": item["id"], "source": item["source"], "text": item["text"]}
"source": item['source'], )
"text": item['text']
})
self.vector_store.add(vectors, metadata) self.vector_store.add(vectors, metadata)
print(f"Loaded {len(vectors)} chunks into vector store.") print(f"Loaded {len(vectors)} chunks into vector store.")
except FileNotFoundError: except FileNotFoundError:
print(f"Error: {KNOWLEDGE_BASE_PATH} not found. Did you run the embedder script?") print(f"Error: {KNOWLEDGE_BASE_PATH} not found.")
exit(1) raise
except Exception as e: except Exception as e:
print(f"Error loading knowledge base: {e}") print(f"Error loading knowledge base: {e}")
exit(1) raise
def retrieve_context(self, query, top_k=5):
print("[1. Retrieving Context...]")
def process_query(self, query):
try: try:
# --- CRITICAL FIX: Use the EXACT same model as the embedder ---
# Embedder used: "qwen/qwen3-embedding-8b" -> Dimension 4096
# We must use the same here to avoid shape mismatch.
response = self.client.embeddings.create( response = self.client.embeddings.create(
model="qwen/qwen3-embedding-8b", model="qwen/qwen3-embedding-8b", input=query
input=query
) )
query_emb = response.data[0].embedding query_emb = response.data[0].embedding
context_chunks = self.vector_store.search(query_emb, top_k=5)
# Search the vector store if not context_chunks:
context_chunks = self.vector_store.search(query_emb, top_k=top_k) return "I couldn't find any relevant information in the knowledge base."
return context_chunks context_text = "\n\n---\n\n".join(
[chunk["metadata"]["text"] for chunk in context_chunks]
except Exception as e: )
print(f"Error retrieving context: {e}")
return []
def generate_answer(self, query, context_chunks):
print("[2. Generating Answer...]")
# Build the context string
context_text = "\n\n---\n\n".join([chunk['metadata']['text'] for chunk in context_chunks])
system_prompt = """You are a helpful AI assistant specializing in DBT (Dialectical Behavior Therapy). system_prompt = """You are a helpful AI assistant specializing in DBT (Dialectical Behavior Therapy).
Use the provided context to answer the user's question. Use the provided context to answer the user's question.
If the answer is not in the context, say you don't know based on the provided text. If the answer is not in the context, say you don't know based on the provided text.
Be concise and compassionate.""" Be concise and compassionate."""
user_prompt = f"""Context: user_prompt = f"Context:\n{context_text}\n\nQuestion: {query}"
{context_text}
Question: {query}"""
try:
# Using a strong model for the final generation
response = self.client.chat.completions.create( response = self.client.chat.completions.create(
model="openai/gpt-4o-mini", # You can change this to "qwen/qwen-3-8b" or similar if desired model="openai/gpt-4o-mini",
messages=[ messages=[
{"role": "system", "content": system_prompt}, {"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt} {"role": "user", "content": user_prompt},
], ],
temperature=0.7 temperature=0.7,
) )
return response.choices[0].message.content return response.choices[0].message.content
except Exception as e: except Exception as e:
return f"Error generating answer: {e}" return f"Error processing query: {e}"
def process_query(self, query):
# 1. Retrieve
context = self.retrieve_context(query)
if not context: # Initialize the Jury System
return "I couldn't find any relevant information in the knowledge base." print("Initializing AI Jury System...")
jury_system = JurySystem()
print("Jury System ready!")
# Optional: Print sources for debugging # Discord Bot Setup
print(f" Found {len(context)} relevant chunks (Top score: {context[0]['score']:.4f})") intents = discord.Intents.default()
intents.message_content = True
bot = commands.Bot(command_prefix="!", intents=intents)
# 2. Generate
answer = self.generate_answer(query, context)
return answer @bot.event
async def on_ready():
print(f"Bot logged in as {bot.user}")
def main():
print("Initializing AI Jury System...")
system = JurySystem()
print("\nSystem Ready. Ask a question (or type 'exit').") @bot.event
async def on_message(message):
if message.author == bot.user:
return
while True: # Process all messages as DBT queries
try: if not message.content.startswith("!"):
user_query = input("\nYou: ").strip() async with message.channel.typing():
response = jury_system.process_query(message.content)
await message.reply(response)
if user_query.lower() in ['exit', 'quit']: await bot.process_commands(message)
print("Goodbye!")
break
if not user_query:
continue
response = system.process_query(user_query) @bot.command(name="ask")
print(f"\nAI: {response}") async def ask_dbt(ctx, *, question):
"""Ask a DBT-related question"""
async with ctx.typing():
response = jury_system.process_query(question)
await ctx.send(response)
except KeyboardInterrupt:
print("\nGoodbye!")
break
except Exception as e:
print(f"\nAn error occurred: {e}")
if __name__ == "__main__": bot.run(DISCORD_BOT_TOKEN)
main()