Synculous-2/ai/parser.py

"""
parser.py - LLM-powered JSON parser with retry and validation

Config-driven via ai_config.json. Supports:
- Any OpenAI-compatible API (OpenRouter, local, etc.)
- Reasoning models that output in reasoning field
- Schema validation with automatic retry
- Conversation context for multi-turn interactions
"""

import json
import os
import re
from openai import OpenAI

CONFIG_PATH = os.environ.get(
    "AI_CONFIG_PATH", os.path.join(os.path.dirname(__file__), "ai_config.json")
)

with open(CONFIG_PATH, "r") as f:
    AI_CONFIG = json.load(f)

client = OpenAI(
    api_key=os.getenv("OPENROUTER_API_KEY"),
    base_url=os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1"),
)


def _extract_json_from_text(text):
    """Pull the first JSON object out of a block of text (for reasoning models)."""
    match = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL)
    if match:
        return match.group(1)
    match = re.search(r"(\{[^{}]*\})", text, re.DOTALL)
    if match:
        return match.group(1)
    return None


def _call_llm(system_prompt, user_prompt):
    """Call OpenAI-compatible API and return the response text."""
    try:
        response = client.chat.completions.create(
            model=AI_CONFIG["model"],
            max_tokens=AI_CONFIG.get("max_tokens", 8192),
            timeout=AI_CONFIG["validation"]["timeout_seconds"],
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
        )
        msg = response.choices[0].message
        text = msg.content.strip() if msg.content else ""
        if text:
            return text
        reasoning = getattr(msg, "reasoning", None)
        if reasoning:
            extracted = _extract_json_from_text(reasoning)
            if extracted:
                return extracted
        return None
    except Exception as e:
        print(f"LLM error: {type(e).__name__}: {e}", flush=True)
        return None


def parse(user_input, interaction_type, retry_count=0, errors=None, history=None):
    """
    Parse user input into structured JSON using LLM.

    Args:
        user_input: The raw user message
        interaction_type: Key in ai_config.json prompts (e.g., 'command_parser')
        retry_count: Internal retry counter
        errors: Previous validation errors for retry
        history: List of (user_msg, parsed_result) tuples for context

    Returns:
        dict: Parsed JSON or error dict
    """
    if retry_count >= AI_CONFIG["validation"]["max_retries"]:
        return {
            "error": f"Failed to parse after {retry_count} retries",
            "user_input": user_input,
        }

    prompt_config = AI_CONFIG["prompts"].get(interaction_type)
    if not prompt_config:
        return {
            "error": f"Unknown interaction type: {interaction_type}",
            "user_input": user_input,
        }

    history_context = "No previous context"
    if history and len(history) > 0:
        history_lines = []
        for i, (msg, result) in enumerate(history[-3:]):
            history_lines.append(f"{i + 1}. User: {msg}")
            if isinstance(result, dict) and not result.get("error"):
                history_lines.append(f"   Parsed: {json.dumps(result)}")
            else:
                history_lines.append(f"   Parsed: {result}")
        history_context = "\n".join(history_lines)

    user_prompt = prompt_config["user_template"].format(
        user_input=user_input, history_context=history_context
    )

    if errors:
        user_prompt += (
            f"\n\nPrevious attempt had errors: {errors}\nPlease fix and try again."
        )

    response_text = _call_llm(prompt_config["system"], user_prompt)
    if not response_text:
        return {"error": "AI service unavailable", "user_input": user_input}

    try:
        parsed = json.loads(response_text)
    except json.JSONDecodeError:
        return parse(
            user_input,
            interaction_type,
            retry_count + 1,
            ["Response was not valid JSON"],
            history=history,
        )

    if "error" in parsed:
        return parsed

    validator = AI_CONFIG["validation"].get("validators", {}).get(interaction_type)
    if validator:
        validation_errors = validator(parsed)
        if validation_errors:
            return parse(
                user_input,
                interaction_type,
                retry_count + 1,
                validation_errors,
                history=history,
            )

    return parsed


def register_validator(interaction_type, validator_fn):
    """Register a custom validation function for an interaction type."""
    if "validators" not in AI_CONFIG["validation"]:
        AI_CONFIG["validation"]["validators"] = {}
    AI_CONFIG["validation"]["validators"][interaction_type] = validator_fn