""" Gemini helper · resilient version • Tries the newest "gemini-1.5-flash" first (fast, 128k ctx) • Falls back to "gemini-pro" (32k ctx) • If the SDK returns 404 / PERMISSION_DENIED the call degrades gracefully to an empty string so orchestrator can switch to OpenAI. """ import os, asyncio, google.generativeai as genai from google.api_core import exceptions as gexc GEN_KEY = os.getenv("GEMINI_KEY") # set in HF “Secrets” if GEN_KEY: genai.configure(api_key=GEN_KEY) # cache models to avoid re-instantiation per request _MODELS = {} def _get_model(name: str): if name not in _MODELS: _MODELS[name] = genai.GenerativeModel(name) return _MODELS[name] async def _generate(prompt: str, model_name: str, temp: float = 0.3) -> str: try: rsp = await asyncio.to_thread( _get_model(model_name).generate_content, prompt, generation_config={"temperature": temp}, ) return rsp.text.strip() except (gexc.NotFound, gexc.PermissionDenied): # propagate None so orchestrator can decide to fall back return "" # ---------- public wrappers ---------- async def gemini_summarize(text: str, words: int = 150) -> str: prompt = f"Summarize in ≤{words} words:\n{text[:12000]}" # try Flash first → Pro second out = await _generate(prompt, "gemini-1.5-flash") if not out: out = await _generate(prompt, "gemini-pro") return out async def gemini_qa(question: str, context: str = "") -> str: prompt = ( "Use the context to answer concisely.\n\n" f"Context:\n{context[:10000]}\n\nQ: {question}\nA:" ) out = await _generate(prompt, "gemini-1.5-flash") if not out: out = await _generate(prompt, "gemini-pro") return out or "Gemini could not answer (model/key unavailable)."