Fix knowledge base loader to handle list format embeddings

Handle both dict and list formats in load_knowledge_base function to fix AttributeError
This commit is contained in:
2026-02-16 19:06:31 -06:00
parent 832c1e1a23
commit b1bb05e879
2 changed files with 191 additions and 390 deletions

View File

@@ -53,9 +53,24 @@ def load_knowledge_base(
with open(file_path, "r") as f:
data = json.load(f)
chunks = data.get("chunks", [])
embeddings = data.get("embeddings", [])
metadata = data.get("metadata", {})
# Handle both dict format {"chunks": [...], "embeddings": [...], "metadata": {...}}
# and legacy list format where data is just the chunks
if isinstance(data, dict):
chunks = data.get("chunks", [])
embeddings = data.get("embeddings", [])
metadata = data.get("metadata", {})
elif isinstance(data, list):
# Legacy format: assume it's just chunks, or list of [chunk, embedding] pairs
if data and isinstance(data[0], dict) and "text" in data[0]:
# Format: [{"text": "...", "embedding": [...]}, ...]
chunks = [item.get("text", "") for item in data]
embeddings = [item.get("embedding", []) for item in data]
metadata = {"format": "legacy_list_of_dicts"}
else:
# Unknown list format - can't process
return None
else:
return None
# Add file_path to metadata for reference
metadata["_file_path"] = file_path