"""
Gemini helper · resilient version

• Tries the newest "gemini-1.5-flash" first (fast, 128k ctx)
• Falls back to "gemini-pro" (32k ctx)
• If the SDK returns 404 / PERMISSION_DENIED the call degrades
  gracefully to an empty string so orchestrator can switch to OpenAI.
"""

import os, asyncio, google.generativeai as genai
from google.api_core import exceptions as gexc

GEN_KEY = os.getenv("GEMINI_KEY")  # set in HF “Secrets”
if GEN_KEY:
    genai.configure(api_key=GEN_KEY)

# cache models to avoid re-instantiation per request
_MODELS = {}

def _get_model(name: str):
    if name not in _MODELS:
        _MODELS[name] = genai.GenerativeModel(name)
    return _MODELS[name]


async def _generate(prompt: str, model_name: str, temp: float = 0.3) -> str:
    try:
        rsp = await asyncio.to_thread(
            _get_model(model_name).generate_content,
            prompt,
            generation_config={"temperature": temp},
        )
        return rsp.text.strip()
    except (gexc.NotFound, gexc.PermissionDenied):
        # propagate None so orchestrator can decide to fall back
        return ""


# ---------- public wrappers ----------
async def gemini_summarize(text: str, words: int = 150) -> str:
    prompt = f"Summarize in ≤{words} words:\n{text[:12000]}"
    # try Flash first → Pro second
    out = await _generate(prompt, "gemini-1.5-flash")
    if not out:
        out = await _generate(prompt, "gemini-pro")
    return out


async def gemini_qa(question: str, context: str = "") -> str:
    prompt = (
        "Use the context to answer concisely.\n\n"
        f"Context:\n{context[:10000]}\n\nQ: {question}\nA:"
    )
    out = await _generate(prompt, "gemini-1.5-flash")
    if not out:
        out = await _generate(prompt, "gemini-pro")
    return out or "Gemini could not answer (model/key unavailable)."