Compare commits

..

2 Commits

Author SHA1 Message Date
c7be19611a Merge branch 'main' of https://git.scorpi.us/chelsea/Synculous-2 2026-02-16 19:08:19 -06:00
b1bb05e879 Fix knowledge base loader to handle list format embeddings
Handle both dict and list formats in load_knowledge_base function to fix AttributeError
2026-02-16 19:06:31 -06:00
2 changed files with 191 additions and 390 deletions

View File

@@ -1,409 +1,195 @@
"""
bot.py - Discord bot client with session management and command routing
Features:
- Login flow with username/password
- Session management with JWT tokens
- AI-powered command parsing via registry
- Background task loop for polling
"""
import discord
from discord.ext import tasks
import os
import sys
import json import json
import base64 import time
import requests import numpy as np
import bcrypt from openai import OpenAI
import pickle
from bot.command_registry import get_handler, list_registered # --- Configuration ---
import ai.parser as ai_parser CONFIG_PATH = 'config.json'
import bot.commands.routines # noqa: F401 - registers handler KNOWLEDGE_BASE_PATH = 'dbt_knowledge.json'
import bot.commands.medications # noqa: F401 - registers handler
import bot.commands.knowledge # noqa: F401 - registers handler
DISCORD_BOT_TOKEN = os.getenv("DISCORD_BOT_TOKEN") class SimpleVectorStore:
API_URL = os.getenv("API_URL", "http://app:5000") """A simple in-memory vector store using NumPy."""
def __init__(self):
self.vectors = []
self.metadata = []
user_sessions = {} def add(self, vectors, metadatas):
login_state = {} self.vectors.extend(vectors)
message_history = {} self.metadata.extend(metadatas)
user_cache = {}
CACHE_FILE = "/app/user_cache.pkl"
intents = discord.Intents.default() def search(self, query_vector, top_k=5):
intents.message_content = True if not self.vectors:
return []
client = discord.Client(intents=intents) # Convert to numpy arrays for efficient math
query_vec = np.array(query_vector)
doc_vecs = np.array(self.vectors)
# Cosine Similarity: (A . B) / (||A|| * ||B||)
# Note: Both vectors must have the same dimension (e.g., 4096)
norms = np.linalg.norm(doc_vecs, axis=1)
def decodeJwtPayload(token): # Avoid division by zero
payload = token.split(".")[1] valid_indices = norms > 0
payload += "=" * (4 - len(payload) % 4) scores = np.zeros(len(doc_vecs))
return json.loads(base64.urlsafe_b64decode(payload))
# Calculate dot product
dot_products = np.dot(doc_vecs, query_vec)
def apiRequest(method, endpoint, token=None, data=None): # Calculate cosine similarity only for valid norms
url = f"{API_URL}{endpoint}" scores[valid_indices] = dot_products[valid_indices] / (norms[valid_indices] * np.linalg.norm(query_vec))
headers = {"Content-Type": "application/json"}
if token: # Get top_k indices
headers["Authorization"] = f"Bearer {token}" top_indices = np.argsort(scores)[-top_k:][::-1]
try:
resp = getattr(requests, method)(url, headers=headers, json=data, timeout=10) results = []
for idx in top_indices:
results.append({
"metadata": self.metadata[idx],
"score": scores[idx]
})
return results
class JurySystem:
def __init__(self):
self.config = self.load_config()
# Initialize OpenRouter Client
self.client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=self.config['openrouter_api_key']
)
self.vector_store = SimpleVectorStore()
self.load_knowledge_base()
def load_config(self):
with open(CONFIG_PATH, 'r') as f:
return json.load(f)
def load_knowledge_base(self):
"""Loads the pre-computed embeddings from the JSON file."""
print(f"Loading knowledge base from {KNOWLEDGE_BASE_PATH}...")
try: try:
return resp.json(), resp.status_code with open(KNOWLEDGE_BASE_PATH, 'r', encoding='utf-8') as f:
except ValueError: data = json.load(f)
return {}, resp.status_code
except requests.RequestException:
return {"error": "API unavailable"}, 503
vectors = []
metadata = []
def loadCache(): for item in data:
try: vectors.append(item['embedding'])
if os.path.exists(CACHE_FILE): metadata.append({
with open(CACHE_FILE, "rb") as f: "id": item['id'],
global user_cache "source": item['source'],
user_cache = pickle.load(f) "text": item['text']
print(f"Loaded cache for {len(user_cache)} users") })
except Exception as e:
print(f"Error loading cache: {e}")
self.vector_store.add(vectors, metadata)
print(f"Loaded {len(vectors)} chunks into vector store.")
def saveCache(): except FileNotFoundError:
try: print(f"Error: {KNOWLEDGE_BASE_PATH} not found. Did you run the embedder script?")
with open(CACHE_FILE, "wb") as f: exit(1)
pickle.dump(user_cache, f) except Exception as e:
except Exception as e: print(f"Error loading knowledge base: {e}")
print(f"Error saving cache: {e}") exit(1)
def retrieve_context(self, query, top_k=5):
print("[1. Retrieving Context...]")
def hashPassword(password): try:
return bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8") # --- CRITICAL FIX: Use the EXACT same model as the embedder ---
# Embedder used: "qwen/qwen3-embedding-8b" -> Dimension 4096
# We must use the same here to avoid shape mismatch.
def verifyPassword(password, hashed): response = self.client.embeddings.create(
return bcrypt.checkpw(password.encode("utf-8"), hashed.encode("utf-8")) model="qwen/qwen3-embedding-8b",
input=query
def getCachedUser(discord_id):
return user_cache.get(discord_id)
def setCachedUser(discord_id, user_data):
user_cache[discord_id] = user_data
saveCache()
def negotiateToken(discord_id, username, password):
cached = getCachedUser(discord_id)
if (
cached
and cached.get("username") == username
and verifyPassword(password, cached.get("hashed_password"))
):
result, status = apiRequest(
"post", "/api/login", data={"username": username, "password": password}
)
if status == 200 and "token" in result:
token = result["token"]
payload = decodeJwtPayload(token)
user_uuid = payload["sub"]
setCachedUser(
discord_id,
{
"hashed_password": cached["hashed_password"],
"user_uuid": user_uuid,
"username": username,
},
)
return token, user_uuid
return None, None
result, status = apiRequest(
"post", "/api/login", data={"username": username, "password": password}
)
if status == 200 and "token" in result:
token = result["token"]
payload = decodeJwtPayload(token)
user_uuid = payload["sub"]
setCachedUser(
discord_id,
{
"hashed_password": hashPassword(password),
"user_uuid": user_uuid,
"username": username,
},
)
return token, user_uuid
return None, None
async def handleAuthFailure(message):
discord_id = message.author.id
user_sessions.pop(discord_id, None)
await message.channel.send(
"Your session has expired. Send any message to log in again."
)
async def handleLoginStep(message):
discord_id = message.author.id
state = login_state[discord_id]
if state["step"] == "username":
state["username"] = message.content.strip()
state["step"] = "password"
await message.channel.send("Password?")
elif state["step"] == "password":
username = state["username"]
password = message.content.strip()
del login_state[discord_id]
token, user_uuid = negotiateToken(discord_id, username, password)
if token and user_uuid:
user_sessions[discord_id] = {
"token": token,
"user_uuid": user_uuid,
"username": username,
}
registered = ", ".join(list_registered()) or "none"
await message.channel.send(
f"Welcome back **{username}**!\n\n"
f"Registered modules: {registered}\n\n"
f"Send 'help' for available commands."
)
else:
await message.channel.send(
"Invalid credentials. Send any message to try again."
) )
query_emb = response.data[0].embedding
async def sendHelpMessage(message): # Search the vector store
help_msg = """**🤖 Synculous Bot - Natural Language Commands** context_chunks = self.vector_store.search(query_emb, top_k=top_k)
Just talk to me naturally! Here are some examples: return context_chunks
**💊 Medications:** except Exception as e:
"add lsd 50 mcg every tuesday at 4:20pm" print(f"Error retrieving context: {e}")
"take my wellbutrin" return []
"what meds do i have today?"
"show my refills"
"snooze my reminder for 30 minutes"
"check adherence"
**📋 Routines:** def generate_answer(self, query, context_chunks):
"create morning routine with brush teeth, shower, eat" print("[2. Generating Answer...]")
"start my morning routine"
"done" (complete current step)
"skip" (skip current step)
"pause/resume" (pause or continue)
"what steps are in my routine?"
"schedule workout for monday wednesday friday at 7am"
"show my stats"
**💡 Tips:** # Build the context string
• I understand natural language, typos, and slang context_text = "\n\n---\n\n".join([chunk['metadata']['text'] for chunk in context_chunks])
• If I'm unsure, I'll ask for clarification
• For important actions, I'll ask you to confirm with "yes" or "no"
• When you're in a routine, shortcuts like "done", "skip", "pause" work automatically"""
await message.channel.send(help_msg)
system_prompt = """You are a helpful AI assistant specializing in DBT (Dialectical Behavior Therapy).
Use the provided context to answer the user's question.
If the answer is not in the context, say you don't know based on the provided text.
Be concise and compassionate."""
async def checkActiveSession(session): user_prompt = f"""Context:
"""Check if user has an active routine session and return details.""" {context_text}
token = session.get("token")
if not token:
return None
resp, status = apiRequest("get", "/api/sessions/active", token) Question: {query}"""
if status == 200 and "session" in resp:
return resp
return None
try:
# Using a strong model for the final generation
response = self.client.chat.completions.create(
model="openai/gpt-4o-mini", # You can change this to "qwen/qwen-3-8b" or similar if desired
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.7
)
async def handleConfirmation(message, session): return response.choices[0].message.content
"""Handle yes/no confirmation responses. Returns True if handled."""
discord_id = message.author.id
user_input = message.content.lower().strip()
if "pending_confirmations" not in session: except Exception as e:
return False return f"Error generating answer: {e}"
# Check for any pending confirmations def process_query(self, query):
pending = session["pending_confirmations"] # 1. Retrieve
if not pending: context = self.retrieve_context(query)
return False
# Get the most recent pending confirmation if not context:
confirmation_id = list(pending.keys())[-1] return "I couldn't find any relevant information in the knowledge base."
confirmation_data = pending[confirmation_id]
if user_input in ("yes", "y", "yeah", "sure", "ok", "confirm"): # Optional: Print sources for debugging
# Execute the confirmed action print(f" Found {len(context)} relevant chunks (Top score: {context[0]['score']:.4f})")
del pending[confirmation_id]
interaction_type = confirmation_data.get("interaction_type") # 2. Generate
handler = get_handler(interaction_type) answer = self.generate_answer(query, context)
if handler: return answer
# Create a fake parsed object for the handler
fake_parsed = confirmation_data.copy()
fake_parsed["needs_confirmation"] = False
await handler(message, session, fake_parsed)
return True
elif user_input in ("no", "n", "nah", "cancel", "abort"): def main():
del pending[confirmation_id] print("Initializing AI Jury System...")
await message.channel.send("❌ Cancelled.") system = JurySystem()
return True
return False print("\nSystem Ready. Ask a question (or type 'exit').")
while True:
try:
user_query = input("\nYou: ").strip()
async def handleActiveSessionShortcuts(message, session, active_session): if user_query.lower() in ['exit', 'quit']:
"""Handle shortcuts like 'done', 'skip', 'next' when in active session.""" print("Goodbye!")
user_input = message.content.lower().strip() break
# Map common shortcuts to actions if not user_query:
shortcuts = { continue
"done": ("routine", "complete"),
"finished": ("routine", "complete"),
"complete": ("routine", "complete"),
"next": ("routine", "complete"),
"skip": ("routine", "skip"),
"pass": ("routine", "skip"),
"pause": ("routine", "pause"),
"hold": ("routine", "pause"),
"resume": ("routine", "resume"),
"continue": ("routine", "resume"),
"stop": ("routine", "cancel"),
"quit": ("routine", "cancel"),
"abort": ("routine", "abort"),
}
if user_input in shortcuts: response = system.process_query(user_query)
interaction_type, action = shortcuts[user_input] print(f"\nAI: {response}")
handler = get_handler(interaction_type)
if handler:
fake_parsed = {"action": action}
await handler(message, session, fake_parsed)
return True
return False
async def routeCommand(message):
discord_id = message.author.id
session = user_sessions[discord_id]
user_input = message.content.lower()
if "help" in user_input or "what can i say" in user_input:
await sendHelpMessage(message)
return
# Check for active session first
active_session = await checkActiveSession(session)
# Handle confirmation responses
confirmation_handled = await handleConfirmation(message, session)
if confirmation_handled:
return
# Handle shortcuts when in active session
if active_session:
shortcut_handled = await handleActiveSessionShortcuts(
message, session, active_session
)
if shortcut_handled:
return
async with message.channel.typing():
history = message_history.get(discord_id, [])
# Add context about active session to help AI understand
context = ""
if active_session:
session_data = active_session.get("session", {})
routine_name = session_data.get("routine_name", "a routine")
current_step = session_data.get("current_step_index", 0) + 1
total_steps = active_session.get("total_steps", 0)
context = f"\n[Context: User is currently in active session for '{routine_name}', on step {current_step} of {total_steps}. They can say 'done', 'skip', 'pause', 'resume', or 'stop'.]"
parsed = await ai_parser.parse(
message.content + context, "command_parser", history=history
)
if discord_id not in message_history:
message_history[discord_id] = []
message_history[discord_id].append((message.content, parsed))
message_history[discord_id] = message_history[discord_id][-5:]
if "needs_clarification" in parsed:
await message.channel.send(
f"I'm not quite sure what you mean. {parsed['needs_clarification']}"
)
return
if "error" in parsed:
await message.channel.send(
f"I had trouble understanding that: {parsed['error']}"
)
return
interaction_type = parsed.get("interaction_type")
handler = get_handler(interaction_type)
if handler:
await handler(message, session, parsed)
else:
registered = ", ".join(list_registered()) or "none"
await message.channel.send(
f"Unknown command type '{interaction_type}'. Registered modules: {registered}"
)
@client.event
async def on_ready():
print(f"Bot logged in as {client.user}")
loadCache()
backgroundLoop.start()
@client.event
async def on_message(message):
if message.author == client.user:
return
if not isinstance(message.channel, discord.DMChannel):
return
discord_id = message.author.id
if discord_id in login_state:
await handleLoginStep(message)
return
if discord_id not in user_sessions:
login_state[discord_id] = {"step": "username"}
await message.channel.send("Welcome! Send your username to log in.")
return
await routeCommand(message)
@tasks.loop(seconds=60)
async def backgroundLoop():
"""Override this in your domain module or extend as needed."""
pass
@backgroundLoop.before_loop
async def beforeBackgroundLoop():
await client.wait_until_ready()
except KeyboardInterrupt:
print("\nGoodbye!")
break
except Exception as e:
print(f"\nAn error occurred: {e}")
if __name__ == "__main__": if __name__ == "__main__":
client.run(DISCORD_BOT_TOKEN) main()

View File

@@ -53,9 +53,24 @@ def load_knowledge_base(
with open(file_path, "r") as f: with open(file_path, "r") as f:
data = json.load(f) data = json.load(f)
chunks = data.get("chunks", []) # Handle both dict format {"chunks": [...], "embeddings": [...], "metadata": {...}}
embeddings = data.get("embeddings", []) # and legacy list format where data is just the chunks
metadata = data.get("metadata", {}) if isinstance(data, dict):
chunks = data.get("chunks", [])
embeddings = data.get("embeddings", [])
metadata = data.get("metadata", {})
elif isinstance(data, list):
# Legacy format: assume it's just chunks, or list of [chunk, embedding] pairs
if data and isinstance(data[0], dict) and "text" in data[0]:
# Format: [{"text": "...", "embedding": [...]}, ...]
chunks = [item.get("text", "") for item in data]
embeddings = [item.get("embedding", []) for item in data]
metadata = {"format": "legacy_list_of_dicts"}
else:
# Unknown list format - can't process
return None
else:
return None
# Add file_path to metadata for reference # Add file_path to metadata for reference
metadata["_file_path"] = file_path metadata["_file_path"] = file_path