fix yesterdays schedule blocking todays
This commit is contained in:
56
regenerate_embeddings.py
Normal file
56
regenerate_embeddings.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Regenerate DBT embeddings with qwen/qwen3-embedding-8b model (384 dimensions)"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from openai import OpenAI
|
||||
import time
|
||||
|
||||
# Load config
|
||||
with open("config.json", "r") as f:
|
||||
config = json.load(f)
|
||||
|
||||
# Initialize OpenAI client with OpenRouter
|
||||
client = OpenAI(
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_key=config["openrouter_api_key"],
|
||||
)
|
||||
|
||||
# Load text data
|
||||
with open("bot/data/dbt_knowledge.text.json", "r") as f:
|
||||
text_data = json.load(f)
|
||||
|
||||
print(f"Regenerating embeddings for {len(text_data)} chunks...")
|
||||
|
||||
# Generate embeddings
|
||||
embeddings_data = []
|
||||
for i, item in enumerate(text_data):
|
||||
try:
|
||||
response = client.embeddings.create(
|
||||
model="qwen/qwen3-embedding-8b",
|
||||
input=item["text"]
|
||||
)
|
||||
embedding = response.data[0].embedding
|
||||
|
||||
embeddings_data.append({
|
||||
"id": item["id"],
|
||||
"source": item["source"],
|
||||
"text": item["text"],
|
||||
"embedding": embedding
|
||||
})
|
||||
|
||||
if (i + 1) % 10 == 0:
|
||||
print(f"Processed {i + 1}/{len(text_data)} chunks...")
|
||||
|
||||
# Small delay to avoid rate limits
|
||||
time.sleep(0.1)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing item {i}: {e}")
|
||||
continue
|
||||
|
||||
# Save new embeddings
|
||||
with open("bot/data/dbt_knowledge.embeddings.json", "w") as f:
|
||||
json.dump(embeddings_data, f)
|
||||
|
||||
print(f"\nDone! Generated {len(embeddings_data)} embeddings with {len(embeddings_data[0]['embedding'])} dimensions")
|
||||
Reference in New Issue
Block a user