Compare commits

..

2 Commits

Author SHA1 Message Date
c7be19611a Merge branch 'main' of https://git.scorpi.us/chelsea/Synculous-2 2026-02-16 19:08:19 -06:00
b1bb05e879 Fix knowledge base loader to handle list format embeddings
Handle both dict and list formats in load_knowledge_base function to fix AttributeError
2026-02-16 19:06:31 -06:00
2 changed files with 191 additions and 390 deletions

View File

@@ -1,409 +1,195 @@
"""
bot.py - Discord bot client with session management and command routing
Features:
- Login flow with username/password
- Session management with JWT tokens
- AI-powered command parsing via registry
- Background task loop for polling
"""
import discord
from discord.ext import tasks
import os
import sys
import json import json
import base64 import time
import requests import numpy as np
import bcrypt from openai import OpenAI
import pickle
from bot.command_registry import get_handler, list_registered # --- Configuration ---
import ai.parser as ai_parser CONFIG_PATH = 'config.json'
import bot.commands.routines # noqa: F401 - registers handler KNOWLEDGE_BASE_PATH = 'dbt_knowledge.json'
import bot.commands.medications # noqa: F401 - registers handler
import bot.commands.knowledge # noqa: F401 - registers handler
DISCORD_BOT_TOKEN = os.getenv("DISCORD_BOT_TOKEN") class SimpleVectorStore:
API_URL = os.getenv("API_URL", "http://app:5000") """A simple in-memory vector store using NumPy."""
def __init__(self):
self.vectors = []
self.metadata = []
user_sessions = {} def add(self, vectors, metadatas):
login_state = {} self.vectors.extend(vectors)
message_history = {} self.metadata.extend(metadatas)
user_cache = {}
CACHE_FILE = "/app/user_cache.pkl"
intents = discord.Intents.default() def search(self, query_vector, top_k=5):
intents.message_content = True if not self.vectors:
return []
client = discord.Client(intents=intents) # Convert to numpy arrays for efficient math
query_vec = np.array(query_vector)
doc_vecs = np.array(self.vectors)
# Cosine Similarity: (A . B) / (||A|| * ||B||)
# Note: Both vectors must have the same dimension (e.g., 4096)
norms = np.linalg.norm(doc_vecs, axis=1)
def decodeJwtPayload(token): # Avoid division by zero
payload = token.split(".")[1] valid_indices = norms > 0
payload += "=" * (4 - len(payload) % 4) scores = np.zeros(len(doc_vecs))
return json.loads(base64.urlsafe_b64decode(payload))
# Calculate dot product
dot_products = np.dot(doc_vecs, query_vec)
def apiRequest(method, endpoint, token=None, data=None): # Calculate cosine similarity only for valid norms
url = f"{API_URL}{endpoint}" scores[valid_indices] = dot_products[valid_indices] / (norms[valid_indices] * np.linalg.norm(query_vec))
headers = {"Content-Type": "application/json"}
if token: # Get top_k indices
headers["Authorization"] = f"Bearer {token}" top_indices = np.argsort(scores)[-top_k:][::-1]
results = []
for idx in top_indices:
results.append({
"metadata": self.metadata[idx],
"score": scores[idx]
})
return results
class JurySystem:
def __init__(self):
self.config = self.load_config()
# Initialize OpenRouter Client
self.client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=self.config['openrouter_api_key']
)
self.vector_store = SimpleVectorStore()
self.load_knowledge_base()
def load_config(self):
with open(CONFIG_PATH, 'r') as f:
return json.load(f)
def load_knowledge_base(self):
"""Loads the pre-computed embeddings from the JSON file."""
print(f"Loading knowledge base from {KNOWLEDGE_BASE_PATH}...")
try: try:
resp = getattr(requests, method)(url, headers=headers, json=data, timeout=10) with open(KNOWLEDGE_BASE_PATH, 'r', encoding='utf-8') as f:
try: data = json.load(f)
return resp.json(), resp.status_code
except ValueError:
return {}, resp.status_code
except requests.RequestException:
return {"error": "API unavailable"}, 503
vectors = []
metadata = []
def loadCache(): for item in data:
try: vectors.append(item['embedding'])
if os.path.exists(CACHE_FILE): metadata.append({
with open(CACHE_FILE, "rb") as f: "id": item['id'],
global user_cache "source": item['source'],
user_cache = pickle.load(f) "text": item['text']
print(f"Loaded cache for {len(user_cache)} users") })
self.vector_store.add(vectors, metadata)
print(f"Loaded {len(vectors)} chunks into vector store.")
except FileNotFoundError:
print(f"Error: {KNOWLEDGE_BASE_PATH} not found. Did you run the embedder script?")
exit(1)
except Exception as e: except Exception as e:
print(f"Error loading cache: {e}") print(f"Error loading knowledge base: {e}")
exit(1)
def retrieve_context(self, query, top_k=5):
print("[1. Retrieving Context...]")
def saveCache():
try: try:
with open(CACHE_FILE, "wb") as f: # --- CRITICAL FIX: Use the EXACT same model as the embedder ---
pickle.dump(user_cache, f) # Embedder used: "qwen/qwen3-embedding-8b" -> Dimension 4096
# We must use the same here to avoid shape mismatch.
response = self.client.embeddings.create(
model="qwen/qwen3-embedding-8b",
input=query
)
query_emb = response.data[0].embedding
# Search the vector store
context_chunks = self.vector_store.search(query_emb, top_k=top_k)
return context_chunks
except Exception as e: except Exception as e:
print(f"Error saving cache: {e}") print(f"Error retrieving context: {e}")
return []
def generate_answer(self, query, context_chunks):
print("[2. Generating Answer...]")
def hashPassword(password): # Build the context string
return bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8") context_text = "\n\n---\n\n".join([chunk['metadata']['text'] for chunk in context_chunks])
system_prompt = """You are a helpful AI assistant specializing in DBT (Dialectical Behavior Therapy).
Use the provided context to answer the user's question.
If the answer is not in the context, say you don't know based on the provided text.
Be concise and compassionate."""
def verifyPassword(password, hashed): user_prompt = f"""Context:
return bcrypt.checkpw(password.encode("utf-8"), hashed.encode("utf-8")) {context_text}
Question: {query}"""
def getCachedUser(discord_id): try:
return user_cache.get(discord_id) # Using a strong model for the final generation
response = self.client.chat.completions.create(
model="openai/gpt-4o-mini", # You can change this to "qwen/qwen-3-8b" or similar if desired
def setCachedUser(discord_id, user_data): messages=[
user_cache[discord_id] = user_data {"role": "system", "content": system_prompt},
saveCache() {"role": "user", "content": user_prompt}
],
temperature=0.7
def negotiateToken(discord_id, username, password):
cached = getCachedUser(discord_id)
if (
cached
and cached.get("username") == username
and verifyPassword(password, cached.get("hashed_password"))
):
result, status = apiRequest(
"post", "/api/login", data={"username": username, "password": password}
)
if status == 200 and "token" in result:
token = result["token"]
payload = decodeJwtPayload(token)
user_uuid = payload["sub"]
setCachedUser(
discord_id,
{
"hashed_password": cached["hashed_password"],
"user_uuid": user_uuid,
"username": username,
},
)
return token, user_uuid
return None, None
result, status = apiRequest(
"post", "/api/login", data={"username": username, "password": password}
)
if status == 200 and "token" in result:
token = result["token"]
payload = decodeJwtPayload(token)
user_uuid = payload["sub"]
setCachedUser(
discord_id,
{
"hashed_password": hashPassword(password),
"user_uuid": user_uuid,
"username": username,
},
)
return token, user_uuid
return None, None
async def handleAuthFailure(message):
discord_id = message.author.id
user_sessions.pop(discord_id, None)
await message.channel.send(
"Your session has expired. Send any message to log in again."
) )
return response.choices[0].message.content
async def handleLoginStep(message): except Exception as e:
discord_id = message.author.id return f"Error generating answer: {e}"
state = login_state[discord_id]
if state["step"] == "username": def process_query(self, query):
state["username"] = message.content.strip() # 1. Retrieve
state["step"] = "password" context = self.retrieve_context(query)
await message.channel.send("Password?")
elif state["step"] == "password": if not context:
username = state["username"] return "I couldn't find any relevant information in the knowledge base."
password = message.content.strip()
del login_state[discord_id]
token, user_uuid = negotiateToken(discord_id, username, password) # Optional: Print sources for debugging
print(f" Found {len(context)} relevant chunks (Top score: {context[0]['score']:.4f})")
if token and user_uuid: # 2. Generate
user_sessions[discord_id] = { answer = self.generate_answer(query, context)
"token": token,
"user_uuid": user_uuid,
"username": username,
}
registered = ", ".join(list_registered()) or "none"
await message.channel.send(
f"Welcome back **{username}**!\n\n"
f"Registered modules: {registered}\n\n"
f"Send 'help' for available commands."
)
else:
await message.channel.send(
"Invalid credentials. Send any message to try again."
)
return answer
async def sendHelpMessage(message): def main():
help_msg = """**🤖 Synculous Bot - Natural Language Commands** print("Initializing AI Jury System...")
system = JurySystem()
Just talk to me naturally! Here are some examples: print("\nSystem Ready. Ask a question (or type 'exit').")
**💊 Medications:** while True:
"add lsd 50 mcg every tuesday at 4:20pm" try:
"take my wellbutrin" user_query = input("\nYou: ").strip()
"what meds do i have today?"
"show my refills"
"snooze my reminder for 30 minutes"
"check adherence"
**📋 Routines:** if user_query.lower() in ['exit', 'quit']:
"create morning routine with brush teeth, shower, eat" print("Goodbye!")
"start my morning routine" break
"done" (complete current step)
"skip" (skip current step)
"pause/resume" (pause or continue)
"what steps are in my routine?"
"schedule workout for monday wednesday friday at 7am"
"show my stats"
**💡 Tips:** if not user_query:
• I understand natural language, typos, and slang continue
• If I'm unsure, I'll ask for clarification
• For important actions, I'll ask you to confirm with "yes" or "no"
• When you're in a routine, shortcuts like "done", "skip", "pause" work automatically"""
await message.channel.send(help_msg)
response = system.process_query(user_query)
print(f"\nAI: {response}")
async def checkActiveSession(session): except KeyboardInterrupt:
"""Check if user has an active routine session and return details.""" print("\nGoodbye!")
token = session.get("token") break
if not token: except Exception as e:
return None print(f"\nAn error occurred: {e}")
resp, status = apiRequest("get", "/api/sessions/active", token)
if status == 200 and "session" in resp:
return resp
return None
async def handleConfirmation(message, session):
"""Handle yes/no confirmation responses. Returns True if handled."""
discord_id = message.author.id
user_input = message.content.lower().strip()
if "pending_confirmations" not in session:
return False
# Check for any pending confirmations
pending = session["pending_confirmations"]
if not pending:
return False
# Get the most recent pending confirmation
confirmation_id = list(pending.keys())[-1]
confirmation_data = pending[confirmation_id]
if user_input in ("yes", "y", "yeah", "sure", "ok", "confirm"):
# Execute the confirmed action
del pending[confirmation_id]
interaction_type = confirmation_data.get("interaction_type")
handler = get_handler(interaction_type)
if handler:
# Create a fake parsed object for the handler
fake_parsed = confirmation_data.copy()
fake_parsed["needs_confirmation"] = False
await handler(message, session, fake_parsed)
return True
elif user_input in ("no", "n", "nah", "cancel", "abort"):
del pending[confirmation_id]
await message.channel.send("❌ Cancelled.")
return True
return False
async def handleActiveSessionShortcuts(message, session, active_session):
"""Handle shortcuts like 'done', 'skip', 'next' when in active session."""
user_input = message.content.lower().strip()
# Map common shortcuts to actions
shortcuts = {
"done": ("routine", "complete"),
"finished": ("routine", "complete"),
"complete": ("routine", "complete"),
"next": ("routine", "complete"),
"skip": ("routine", "skip"),
"pass": ("routine", "skip"),
"pause": ("routine", "pause"),
"hold": ("routine", "pause"),
"resume": ("routine", "resume"),
"continue": ("routine", "resume"),
"stop": ("routine", "cancel"),
"quit": ("routine", "cancel"),
"abort": ("routine", "abort"),
}
if user_input in shortcuts:
interaction_type, action = shortcuts[user_input]
handler = get_handler(interaction_type)
if handler:
fake_parsed = {"action": action}
await handler(message, session, fake_parsed)
return True
return False
async def routeCommand(message):
discord_id = message.author.id
session = user_sessions[discord_id]
user_input = message.content.lower()
if "help" in user_input or "what can i say" in user_input:
await sendHelpMessage(message)
return
# Check for active session first
active_session = await checkActiveSession(session)
# Handle confirmation responses
confirmation_handled = await handleConfirmation(message, session)
if confirmation_handled:
return
# Handle shortcuts when in active session
if active_session:
shortcut_handled = await handleActiveSessionShortcuts(
message, session, active_session
)
if shortcut_handled:
return
async with message.channel.typing():
history = message_history.get(discord_id, [])
# Add context about active session to help AI understand
context = ""
if active_session:
session_data = active_session.get("session", {})
routine_name = session_data.get("routine_name", "a routine")
current_step = session_data.get("current_step_index", 0) + 1
total_steps = active_session.get("total_steps", 0)
context = f"\n[Context: User is currently in active session for '{routine_name}', on step {current_step} of {total_steps}. They can say 'done', 'skip', 'pause', 'resume', or 'stop'.]"
parsed = await ai_parser.parse(
message.content + context, "command_parser", history=history
)
if discord_id not in message_history:
message_history[discord_id] = []
message_history[discord_id].append((message.content, parsed))
message_history[discord_id] = message_history[discord_id][-5:]
if "needs_clarification" in parsed:
await message.channel.send(
f"I'm not quite sure what you mean. {parsed['needs_clarification']}"
)
return
if "error" in parsed:
await message.channel.send(
f"I had trouble understanding that: {parsed['error']}"
)
return
interaction_type = parsed.get("interaction_type")
handler = get_handler(interaction_type)
if handler:
await handler(message, session, parsed)
else:
registered = ", ".join(list_registered()) or "none"
await message.channel.send(
f"Unknown command type '{interaction_type}'. Registered modules: {registered}"
)
@client.event
async def on_ready():
print(f"Bot logged in as {client.user}")
loadCache()
backgroundLoop.start()
@client.event
async def on_message(message):
if message.author == client.user:
return
if not isinstance(message.channel, discord.DMChannel):
return
discord_id = message.author.id
if discord_id in login_state:
await handleLoginStep(message)
return
if discord_id not in user_sessions:
login_state[discord_id] = {"step": "username"}
await message.channel.send("Welcome! Send your username to log in.")
return
await routeCommand(message)
@tasks.loop(seconds=60)
async def backgroundLoop():
"""Override this in your domain module or extend as needed."""
pass
@backgroundLoop.before_loop
async def beforeBackgroundLoop():
await client.wait_until_ready()
if __name__ == "__main__": if __name__ == "__main__":
client.run(DISCORD_BOT_TOKEN) main()

View File

@@ -53,9 +53,24 @@ def load_knowledge_base(
with open(file_path, "r") as f: with open(file_path, "r") as f:
data = json.load(f) data = json.load(f)
# Handle both dict format {"chunks": [...], "embeddings": [...], "metadata": {...}}
# and legacy list format where data is just the chunks
if isinstance(data, dict):
chunks = data.get("chunks", []) chunks = data.get("chunks", [])
embeddings = data.get("embeddings", []) embeddings = data.get("embeddings", [])
metadata = data.get("metadata", {}) metadata = data.get("metadata", {})
elif isinstance(data, list):
# Legacy format: assume it's just chunks, or list of [chunk, embedding] pairs
if data and isinstance(data[0], dict) and "text" in data[0]:
# Format: [{"text": "...", "embedding": [...]}, ...]
chunks = [item.get("text", "") for item in data]
embeddings = [item.get("embedding", []) for item in data]
metadata = {"format": "legacy_list_of_dicts"}
else:
# Unknown list format - can't process
return None
else:
return None
# Add file_path to metadata for reference # Add file_path to metadata for reference
metadata["_file_path"] = file_path metadata["_file_path"] = file_path