diff --git a/ai/parser.py b/ai/parser.py index fee4670..0d98ec2 100644 --- a/ai/parser.py +++ b/ai/parser.py @@ -61,7 +61,7 @@ def _call_llm_sync(system_prompt, user_prompt): return extracted return None except Exception as e: - print(f"LLM error: {type(e).__name__}: {e}", flush=True) + print(f"LLM error ({AI_CONFIG['model']}): {type(e).__name__}: {e}", flush=True) return None @@ -119,7 +119,7 @@ async def parse(user_input, interaction_type, retry_count=0, errors=None, histor response_text = await _call_llm(prompt_config["system"], user_prompt) if not response_text: - return {"error": "AI service unavailable", "user_input": user_input} + return {"error": f"AI service unavailable (model: {AI_CONFIG['model']})", "user_input": user_input} try: parsed = json.loads(response_text) diff --git a/bot/commands/knowledge.py b/bot/commands/knowledge.py index 6432d5b..a31989f 100644 --- a/bot/commands/knowledge.py +++ b/bot/commands/knowledge.py @@ -17,10 +17,16 @@ from ai.parser import client # Configuration EPUBS_DIRECTORY = os.getenv("KNOWLEDGE_EMBEDDINGS_DIR", "./bot/data") TOP_K_CHUNKS = 5 -EMBEDDING_MODEL = "sentence-transformers/all-minilm-l12-v2" CHAT_MODEL = "deepseek/deepseek-v3.2" EMBEDDING_EXTENSION = ".embeddings.json" +# Map embedding dimensions to the model that produced them +EMBEDDING_MODELS_BY_DIM = { + 384: "sentence-transformers/all-minilm-l12-v2", + 4096: "qwen/qwen3-embedding-8b", +} +DEFAULT_EMBEDDING_MODEL = "sentence-transformers/all-minilm-l12-v2" + # Cache for loaded embeddings: {file_path: (chunks, embeddings, metadata)} _knowledge_cache: Dict[str, Tuple[List[str], List[List[float]], dict]] = {} @@ -79,9 +85,14 @@ def load_knowledge_base( return _knowledge_cache[file_path] -def get_query_embedding(query: str) -> List[float]: +def get_embedding_model_for_dim(dim: int) -> str: + """Get the correct embedding model for a given dimension.""" + return EMBEDDING_MODELS_BY_DIM.get(dim, DEFAULT_EMBEDDING_MODEL) + + +def get_query_embedding(query: str, model: str = DEFAULT_EMBEDDING_MODEL) -> List[float]: """Embed the user's question via OpenRouter.""" - response = client.embeddings.create(model=EMBEDDING_MODEL, input=query) + response = client.embeddings.create(model=model, input=query) return response.data[0].embedding @@ -271,8 +282,12 @@ async def handle_knowledge(message, session, parsed): await message.channel.send(f"🔍 Searching **{book_title}**...") try: + # Detect embedding dimension and use matching model + emb_dim = len(embeddings[0]) if embeddings else 384 + embedding_model = get_embedding_model_for_dim(emb_dim) + # Get query embedding and search - query_emb = get_query_embedding(query) + query_emb = get_query_embedding(query, model=embedding_model) relevant_chunks, scores = search_context( query_emb, chunks, embeddings, TOP_K_CHUNKS )