File size: 1,850 Bytes
b25beac 7329ecf b25beac 7329ecf b25beac 7329ecf b25beac 7329ecf b25beac 7329ecf b25beac 7329ecf b25beac 7329ecf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
"""
Gemini helper · resilient version
• Tries the newest "gemini-1.5-flash" first (fast, 128k ctx)
• Falls back to "gemini-pro" (32k ctx)
• If the SDK returns 404 / PERMISSION_DENIED the call degrades
gracefully to an empty string so orchestrator can switch to OpenAI.
"""
import os, asyncio, google.generativeai as genai
from google.api_core import exceptions as gexc
GEN_KEY = os.getenv("GEMINI_KEY") # set in HF “Secrets”
if GEN_KEY:
genai.configure(api_key=GEN_KEY)
# cache models to avoid re-instantiation per request
_MODELS = {}
def _get_model(name: str):
if name not in _MODELS:
_MODELS[name] = genai.GenerativeModel(name)
return _MODELS[name]
async def _generate(prompt: str, model_name: str, temp: float = 0.3) -> str:
try:
rsp = await asyncio.to_thread(
_get_model(model_name).generate_content,
prompt,
generation_config={"temperature": temp},
)
return rsp.text.strip()
except (gexc.NotFound, gexc.PermissionDenied):
# propagate None so orchestrator can decide to fall back
return ""
# ---------- public wrappers ----------
async def gemini_summarize(text: str, words: int = 150) -> str:
prompt = f"Summarize in ≤{words} words:\n{text[:12000]}"
# try Flash first → Pro second
out = await _generate(prompt, "gemini-1.5-flash")
if not out:
out = await _generate(prompt, "gemini-pro")
return out
async def gemini_qa(question: str, context: str = "") -> str:
prompt = (
"Use the context to answer concisely.\n\n"
f"Context:\n{context[:10000]}\n\nQ: {question}\nA:"
)
out = await _generate(prompt, "gemini-1.5-flash")
if not out:
out = await _generate(prompt, "gemini-pro")
return out or "Gemini could not answer (model/key unavailable)."
|