#!/usr/bin/env python3 """Regenerate DBT embeddings with qwen/qwen3-embedding-8b model (384 dimensions)""" import json import os from openai import OpenAI import time # Load config with open("config.json", "r") as f: config = json.load(f) # Initialize OpenAI client with OpenRouter client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=config["openrouter_api_key"], ) # Load text data with open("bot/data/dbt_knowledge.text.json", "r") as f: text_data = json.load(f) print(f"Regenerating embeddings for {len(text_data)} chunks...") # Generate embeddings embeddings_data = [] for i, item in enumerate(text_data): try: response = client.embeddings.create( model="qwen/qwen3-embedding-8b", input=item["text"] ) embedding = response.data[0].embedding embeddings_data.append({ "id": item["id"], "source": item["source"], "text": item["text"], "embedding": embedding }) if (i + 1) % 10 == 0: print(f"Processed {i + 1}/{len(text_data)} chunks...") # Small delay to avoid rate limits time.sleep(0.1) except Exception as e: print(f"Error processing item {i}: {e}") continue # Save new embeddings with open("bot/data/dbt_knowledge.embeddings.json", "w") as f: json.dump(embeddings_data, f) print(f"\nDone! Generated {len(embeddings_data)} embeddings with {len(embeddings_data[0]['embedding'])} dimensions")