mgbam commited on
Commit
7329ecf
·
verified ·
1 Parent(s): 7117e78

Update mcp/gemini.py

Browse files
Files changed (1) hide show
  1. mcp/gemini.py +45 -17
mcp/gemini.py CHANGED
@@ -1,29 +1,57 @@
1
- # mcp/gemini.py
2
  """
3
- Lightweight Gemini-Pro helper (text in → text out).
4
- Requires env var GEMINI_KEY.
 
 
 
 
5
  """
6
 
7
- import os, asyncio, google.generativeai as genai # SDK :contentReference[oaicite:1]{index=1}
 
8
 
9
- GEN_KEY = os.getenv("GEMINI_KEY")
10
  if GEN_KEY:
11
  genai.configure(api_key=GEN_KEY)
12
 
13
- _MODEL = None
14
- def _model():
15
- global _MODEL
16
- if _MODEL is None:
17
- _MODEL = genai.GenerativeModel("gemini-pro") # 32 k ctx ­ :contentReference[oaicite:2]{index=2}
18
- return _MODEL
 
 
19
 
20
- # ---------- public helpers ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  async def gemini_summarize(text: str, words: int = 150) -> str:
22
  prompt = f"Summarize in ≤{words} words:\n{text[:12000]}"
23
- rsp = await asyncio.to_thread(_model().generate_content, prompt)
24
- return rsp.text
 
 
 
 
25
 
26
  async def gemini_qa(question: str, context: str = "") -> str:
27
- prompt = f"Answer briefly.\nContext:\n{context[:10000]}\n\nQ: {question}\nA:"
28
- rsp = await asyncio.to_thread(_model().generate_content, prompt)
29
- return rsp.text
 
 
 
 
 
 
 
1
  """
2
+ Gemini helper · resilient version
3
+
4
+ • Tries the newest "gemini-1.5-flash" first (fast, 128k ctx)
5
+ • Falls back to "gemini-pro" (32k ctx)
6
+ • If the SDK returns 404 / PERMISSION_DENIED the call degrades
7
+ gracefully to an empty string so orchestrator can switch to OpenAI.
8
  """
9
 
10
+ import os, asyncio, google.generativeai as genai
11
+ from google.api_core import exceptions as gexc
12
 
13
+ GEN_KEY = os.getenv("GEMINI_KEY") # set in HF “Secrets”
14
  if GEN_KEY:
15
  genai.configure(api_key=GEN_KEY)
16
 
17
+ # cache models to avoid re-instantiation per request
18
+ _MODELS = {}
19
+
20
+ def _get_model(name: str):
21
+ if name not in _MODELS:
22
+ _MODELS[name] = genai.GenerativeModel(name)
23
+ return _MODELS[name]
24
+
25
 
26
+ async def _generate(prompt: str, model_name: str, temp: float = 0.3) -> str:
27
+ try:
28
+ rsp = await asyncio.to_thread(
29
+ _get_model(model_name).generate_content,
30
+ prompt,
31
+ generation_config={"temperature": temp},
32
+ )
33
+ return rsp.text.strip()
34
+ except (gexc.NotFound, gexc.PermissionDenied):
35
+ # propagate None so orchestrator can decide to fall back
36
+ return ""
37
+
38
+
39
+ # ---------- public wrappers ----------
40
  async def gemini_summarize(text: str, words: int = 150) -> str:
41
  prompt = f"Summarize in ≤{words} words:\n{text[:12000]}"
42
+ # try Flash first → Pro second
43
+ out = await _generate(prompt, "gemini-1.5-flash")
44
+ if not out:
45
+ out = await _generate(prompt, "gemini-pro")
46
+ return out
47
+
48
 
49
  async def gemini_qa(question: str, context: str = "") -> str:
50
+ prompt = (
51
+ "Use the context to answer concisely.\n\n"
52
+ f"Context:\n{context[:10000]}\n\nQ: {question}\nA:"
53
+ )
54
+ out = await _generate(prompt, "gemini-1.5-flash")
55
+ if not out:
56
+ out = await _generate(prompt, "gemini-pro")
57
+ return out or "Gemini could not answer (model/key unavailable)."