mgbam commited on
Commit
256d4e5
·
verified ·
1 Parent(s): 10fdd4a

Update core/llm_clients.py

Browse files
Files changed (1) hide show
  1. core/llm_clients.py +61 -129
core/llm_clients.py CHANGED
@@ -2,193 +2,125 @@
2
  import os
3
  import google.generativeai as genai
4
  from huggingface_hub import InferenceClient
5
- import time # For potential retries or delays
6
 
7
  # --- Configuration ---
8
- # These will be populated by os.getenv()
9
  GOOGLE_API_KEY = None
10
  HF_TOKEN = None
11
-
12
- # Status flags, default to False
13
  GEMINI_API_CONFIGURED = False
14
  HF_API_CONFIGURED = False
15
-
16
- # Client instances
17
  hf_inference_client = None
18
- # google_gemini_model_instances cache is not strictly necessary as genai.GenerativeModel is light.
19
- # Removing it for now to simplify, can be added back if model instantiation proves slow.
20
 
21
- # --- Initialization Function (to be called from app.py's global scope) ---
22
  def initialize_all_clients():
23
  global GOOGLE_API_KEY, HF_TOKEN, GEMINI_API_CONFIGURED, HF_API_CONFIGURED, hf_inference_client
24
-
25
  print("INFO: llm_clients.py - Attempting to initialize all API clients...")
26
-
27
- # Google Gemini
28
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
29
- if GOOGLE_API_KEY and GOOGLE_API_KEY.strip(): # Check if key is not None and not just whitespace
30
- print("INFO: llm_clients.py - GOOGLE_API_KEY found in environment.")
31
  try:
32
- # Test configuration by making a very simple, non-resource-intensive call
33
- # or by listing models if supported and cheap.
34
- # For now, genai.configure() is the main check.
35
  genai.configure(api_key=GOOGLE_API_KEY)
36
- # Optionally, try to list models or a similar lightweight check if genai.configure isn't enough
37
- # models = [m for m in genai.list_models() if 'generateContent' in m.supported_generation_methods]
38
- # if not models:
39
- # raise Exception("No usable Gemini models found with this API key, or API not fully enabled.")
40
  GEMINI_API_CONFIGURED = True
41
- print("SUCCESS: llm_clients.py - Google Gemini API configured successfully.")
42
  except Exception as e:
43
  GEMINI_API_CONFIGURED = False
44
- print(f"ERROR: llm_clients.py - Failed to configure/validate Google Gemini API. Key value might be invalid, API not enabled in Google Cloud, or other issue.")
45
- print(f" Gemini Init Error Details: {type(e).__name__}: {e}")
46
  else:
47
- GEMINI_API_CONFIGURED = False # Explicitly set if key is missing/empty
48
- print("WARNING: llm_clients.py - GOOGLE_API_KEY not found or is empty in environment variables.")
49
 
50
- # Hugging Face
51
  HF_TOKEN = os.getenv("HF_TOKEN")
52
- if HF_TOKEN and HF_TOKEN.strip(): # Check if token is not None and not just whitespace
53
- print("INFO: llm_clients.py - HF_TOKEN found in environment.")
54
  try:
55
  hf_inference_client = InferenceClient(token=HF_TOKEN)
56
- # Optionally, you could try a very quick ping to a known small public model if client init isn't enough
57
- # hf_inference_client.text_generation("ping", model="gpt2", max_new_tokens=1)
58
  HF_API_CONFIGURED = True
59
- print("SUCCESS: llm_clients.py - Hugging Face InferenceClient initialized successfully.")
60
  except Exception as e:
61
  HF_API_CONFIGURED = False
62
- print(f"ERROR: llm_clients.py - Failed to initialize Hugging Face InferenceClient. Token might be invalid or other issue.")
63
- print(f" HF Init Error Details: {type(e).__name__}: {e}")
64
- hf_inference_client = None # Ensure client is None on failure
65
  else:
66
- HF_API_CONFIGURED = False # Explicitly set if token is missing/empty
67
- print("WARNING: llm_clients.py - HF_TOKEN not found or is empty in environment variables.")
68
-
69
- print(f"INFO: llm_clients.py - Initialization complete. Gemini Configured: {GEMINI_API_CONFIGURED}, HF Configured: {HF_API_CONFIGURED}")
70
 
 
 
 
 
71
 
72
- # This class remains useful for standardizing responses
73
- class LLMResponse:
74
- def __init__(self, text=None, error=None, success=True, raw_response=None, model_id_used="unknown"):
75
- self.text = text
76
- self.error = error
77
- self.success = success
78
- self.raw_response = raw_response
79
- self.model_id_used = model_id_used
80
-
81
- def __str__(self):
82
- if self.success:
83
- return str(self.text) if self.text is not None else "" # Ensure text is string
84
- return f"ERROR (Model: {self.model_id_used}): {self.error}"
85
 
 
 
 
 
 
86
 
87
  def call_huggingface_api(prompt_text, model_id, temperature=0.7, max_new_tokens=512, system_prompt_text=None):
88
- print(f"DEBUG: llm_clients.py - call_huggingface_api attempt for model: {model_id}")
89
- if not HF_API_CONFIGURED or not hf_inference_client:
90
- error_msg = "Hugging Face API not configured (HF_TOKEN missing, client init failed, or token invalid)."
91
  print(f"ERROR: llm_clients.py - {error_msg}")
92
  return LLMResponse(error=error_msg, success=False, model_id_used=model_id)
93
-
94
- full_prompt = prompt_text
95
- if system_prompt_text:
96
- full_prompt = f"<s>[INST] <<SYS>>\n{system_prompt_text}\n<</SYS>>\n\n{prompt_text} [/INST]" # Llama-style
97
-
98
  try:
99
- print(f" HF API Call - Prompt (first 100 chars): {full_prompt[:100]}...")
100
- use_sample = temperature > 0.001
101
- raw_response = hf_inference_client.text_generation(
102
- full_prompt, model=model_id, max_new_tokens=max_new_tokens,
103
- temperature=temperature if use_sample else None,
104
- do_sample=use_sample,
105
- stream=False
106
- )
107
- print(f" HF API Call - Success for model: {model_id}. Response (first 100 chars): {str(raw_response)[:100]}...")
108
  return LLMResponse(text=raw_response, raw_response=raw_response, model_id_used=model_id)
109
  except Exception as e:
110
- error_msg = f"HF API Error during text_generation ({model_id}): {type(e).__name__} - {str(e)}"
111
  print(f"ERROR: llm_clients.py - {error_msg}")
112
  return LLMResponse(error=error_msg, success=False, raw_response=e, model_id_used=model_id)
113
 
114
- def call_gemini_api(prompt_text, model_id, temperature=0.7, max_new_tokens=768, system_prompt_text=None):
115
- print(f"DEBUG: llm_clients.py - call_gemini_api attempt for model: {model_id}")
116
- if not GEMINI_API_CONFIGURED:
117
- error_msg = "Google Gemini API not configured (GOOGLE_API_KEY missing, config failed, or key invalid)."
 
118
  print(f"ERROR: llm_clients.py - {error_msg}")
119
  return LLMResponse(error=error_msg, success=False, model_id_used=model_id)
120
-
121
  try:
122
- # genai.GenerativeModel is the recommended way to get a model instance.
123
- # system_instruction is preferred for newer models (like 1.5 series).
124
- print(f" Gemini API Call - Getting model instance for: {model_id}")
125
- model_instance = genai.GenerativeModel(
126
- model_name=model_id,
127
- system_instruction=system_prompt_text # Pass system prompt here
128
- )
129
-
130
- generation_config = genai.types.GenerationConfig(
131
- temperature=temperature,
132
- max_output_tokens=max_new_tokens
133
- )
134
-
135
- print(f" Gemini API Call - Prompt (first 100 chars): {prompt_text[:100]}...")
136
- if system_prompt_text: print(f" Gemini API Call - System Prompt (first 100 chars): {system_prompt_text[:100]}...")
137
-
138
- raw_response = model_instance.generate_content(
139
- prompt_text, # User prompt directly if system_instruction is used
140
- generation_config=generation_config,
141
- stream=False
142
- )
143
- print(f" Gemini API Call - Raw response received for model: {model_id}. Prompt feedback: {raw_response.prompt_feedback}, Candidates: {'Yes' if raw_response.candidates else 'No'}")
144
-
145
 
146
  if raw_response.prompt_feedback and raw_response.prompt_feedback.block_reason:
147
  reason = raw_response.prompt_feedback.block_reason_message or raw_response.prompt_feedback.block_reason
148
- error_msg = f"Gemini API: Your prompt was blocked. Reason: {reason}. Try rephrasing."
149
  print(f"WARNING: llm_clients.py - {error_msg}")
150
  return LLMResponse(error=error_msg, success=False, raw_response=raw_response, model_id_used=model_id)
151
-
152
  if not raw_response.candidates:
153
- error_msg = "Gemini API: No candidates returned in response. This often indicates the prompt was blocked or an internal error occurred before generation."
154
- if raw_response.prompt_feedback: error_msg += f" Prompt Feedback: {raw_response.prompt_feedback}"
155
  print(f"WARNING: llm_clients.py - {error_msg}")
156
  return LLMResponse(error=error_msg, success=False, raw_response=raw_response, model_id_used=model_id)
157
 
158
  candidate = raw_response.candidates[0]
159
  if not candidate.content or not candidate.content.parts:
160
  finish_reason = str(candidate.finish_reason if candidate.finish_reason else "UNKNOWN").upper()
161
- error_msg = f"Gemini API: Response generation stopped or yielded no content parts. Finish Reason: {finish_reason}."
162
- if finish_reason == "SAFETY": error_msg += " Likely due to safety filters."
163
- elif finish_reason == "RECITATION": error_msg += " Likely due to recitation policy."
164
- elif finish_reason == "MAX_TOKENS": error_msg += " Consider increasing max_new_tokens if content seems truncated."
165
-
166
  print(f"WARNING: llm_clients.py - {error_msg}")
167
- # Attempt to extract partial text if MAX_TOKENS or other non-error finish reasons
168
- partial_text = ""
169
- if candidate.content and candidate.content.parts and hasattr(candidate.content.parts[0], 'text'):
170
- partial_text = candidate.content.parts[0].text
171
-
172
- if partial_text and finish_reason != "SAFETY" and finish_reason != "RECITATION" and finish_reason != "OTHER": # Only return partial if not a hard block
173
- return LLMResponse(text=partial_text + f"\n[Note: Generation ended due to {finish_reason}]", raw_response=raw_response, model_id_used=model_id)
174
- else: # If safety/recitation or truly no text, return as error
175
- return LLMResponse(error=error_msg, success=False, raw_response=raw_response, model_id_used=model_id)
176
 
177
  response_text = candidate.content.parts[0].text
178
- print(f" Gemini API Call - Success for model: {model_id}. Response text (first 100 chars): {response_text[:100]}...")
179
  return LLMResponse(text=response_text, raw_response=raw_response, model_id_used=model_id)
180
-
181
  except Exception as e:
182
- error_msg = f"Gemini API Call Exception ({model_id}): {type(e).__name__} - {str(e)}"
183
- # Specific error parsing from previous version is good, let's keep it.
184
- if "API key not valid" in str(e) or "PERMISSION_DENIED" in str(e):
185
- error_msg = f"Gemini API Error ({model_id}): API key invalid or permission denied. Check GOOGLE_API_KEY and ensure Gemini API is enabled in Google Cloud. Original: {str(e)}"
186
- elif "Could not find model" in str(e) or "ील नहीं मिला" in str(e):
187
- error_msg = f"Gemini API Error ({model_id}): Model ID '{model_id}' not found or inaccessible with your key. Original: {str(e)}"
188
- elif "User location is not supported" in str(e):
189
- error_msg = f"Gemini API Error ({model_id}): User location not supported for this model/API. Original: {str(e)}"
190
- elif "Quota exceeded" in str(e): # Check for "Quota" in the error message from Google
191
- error_msg = f"Gemini API Error ({model_id}): API quota exceeded. Please check your Google Cloud quotas. Original: {str(e)}"
192
-
193
  print(f"ERROR: llm_clients.py - {error_msg}")
194
  return LLMResponse(error=error_msg, success=False, raw_response=e, model_id_used=model_id)
 
2
  import os
3
  import google.generativeai as genai
4
  from huggingface_hub import InferenceClient
5
+ import time
6
 
7
  # --- Configuration ---
 
8
  GOOGLE_API_KEY = None
9
  HF_TOKEN = None
 
 
10
  GEMINI_API_CONFIGURED = False
11
  HF_API_CONFIGURED = False
 
 
12
  hf_inference_client = None
 
 
13
 
 
14
  def initialize_all_clients():
15
  global GOOGLE_API_KEY, HF_TOKEN, GEMINI_API_CONFIGURED, HF_API_CONFIGURED, hf_inference_client
 
16
  print("INFO: llm_clients.py - Attempting to initialize all API clients...")
 
 
17
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
18
+ if GOOGLE_API_KEY and GOOGLE_API_KEY.strip():
19
+ print("INFO: llm_clients.py - GOOGLE_API_KEY found.")
20
  try:
 
 
 
21
  genai.configure(api_key=GOOGLE_API_KEY)
 
 
 
 
22
  GEMINI_API_CONFIGURED = True
23
+ print("SUCCESS: llm_clients.py - Google Gemini API configured.")
24
  except Exception as e:
25
  GEMINI_API_CONFIGURED = False
26
+ print(f"ERROR: llm_clients.py - Failed to configure Google Gemini API: {type(e).__name__}: {e}")
 
27
  else:
28
+ GEMINI_API_CONFIGURED = False
29
+ print("WARNING: llm_clients.py - GOOGLE_API_KEY not found or empty.")
30
 
 
31
  HF_TOKEN = os.getenv("HF_TOKEN")
32
+ if HF_TOKEN and HF_TOKEN.strip():
33
+ print("INFO: llm_clients.py - HF_TOKEN found.")
34
  try:
35
  hf_inference_client = InferenceClient(token=HF_TOKEN)
 
 
36
  HF_API_CONFIGURED = True
37
+ print("SUCCESS: llm_clients.py - Hugging Face InferenceClient initialized.")
38
  except Exception as e:
39
  HF_API_CONFIGURED = False
40
+ print(f"ERROR: llm_clients.py - Failed to initialize HF InferenceClient: {type(e).__name__}: {e}")
41
+ hf_inference_client = None
 
42
  else:
43
+ HF_API_CONFIGURED = False
44
+ print("WARNING: llm_clients.py - HF_TOKEN not found or empty.")
45
+ print(f"INFO: llm_clients.py - Init complete. Gemini Configured: {GEMINI_API_CONFIGURED}, HF Configured: {HF_API_CONFIGURED}")
 
46
 
47
+ # --- Status Getter Functions ---
48
+ def is_gemini_api_configured():
49
+ global GEMINI_API_CONFIGURED
50
+ return GEMINI_API_CONFIGURED
51
 
52
+ def is_hf_api_configured():
53
+ global HF_API_CONFIGURED
54
+ return HF_API_CONFIGURED
 
 
 
 
 
 
 
 
 
 
55
 
56
+ # ... (LLMResponse class and call_huggingface_api function remain the same as the last full version) ...
57
+ class LLMResponse: # Make sure this is defined
58
+ def __init__(self, text=None, error=None, success=True, raw_response=None, model_id_used="unknown"):
59
+ self.text, self.error, self.success, self.raw_response, self.model_id_used = text, error, success, raw_response, model_id_used
60
+ def __str__(self): return str(self.text) if self.success and self.text is not None else f"ERROR (Model: {self.model_id_used}): {self.error}"
61
 
62
  def call_huggingface_api(prompt_text, model_id, temperature=0.7, max_new_tokens=512, system_prompt_text=None):
63
+ print(f"DEBUG: llm_clients.py - call_huggingface_api for model: {model_id}")
64
+ if not is_hf_api_configured() or not hf_inference_client: # Use getter
65
+ error_msg = "HF API not configured."
66
  print(f"ERROR: llm_clients.py - {error_msg}")
67
  return LLMResponse(error=error_msg, success=False, model_id_used=model_id)
68
+ full_prompt = f"<s>[INST] <<SYS>>\n{system_prompt_text}\n<</SYS>>\n\n{prompt_text} [/INST]" if system_prompt_text else prompt_text
 
 
 
 
69
  try:
70
+ print(f" HF API Call - Prompt (first 100): {full_prompt[:100]}...")
71
+ use_sample = temperature > 0.001
72
+ raw_response = hf_inference_client.text_generation(full_prompt, model=model_id, max_new_tokens=max_new_tokens, temperature=temperature if use_sample else None, do_sample=use_sample, stream=False)
73
+ print(f" HF API Call - Success for {model_id}. Response (first 100): {str(raw_response)[:100]}...")
 
 
 
 
 
74
  return LLMResponse(text=raw_response, raw_response=raw_response, model_id_used=model_id)
75
  except Exception as e:
76
+ error_msg = f"HF API Error ({model_id}): {type(e).__name__} - {str(e)}"
77
  print(f"ERROR: llm_clients.py - {error_msg}")
78
  return LLMResponse(error=error_msg, success=False, raw_response=e, model_id_used=model_id)
79
 
80
+
81
+ def call_gemini_api(prompt_text, model_id, temperature=0.7, max_new_tokens=1024, system_prompt_text=None): # Increased default max_tokens
82
+ print(f"DEBUG: llm_clients.py - call_gemini_api for model: {model_id}")
83
+ if not is_gemini_api_configured(): # Use getter
84
+ error_msg = "Google Gemini API not configured."
85
  print(f"ERROR: llm_clients.py - {error_msg}")
86
  return LLMResponse(error=error_msg, success=False, model_id_used=model_id)
 
87
  try:
88
+ print(f" Gemini API Call - Getting instance for: {model_id}")
89
+ model_instance = genai.GenerativeModel(model_name=model_id, system_instruction=system_prompt_text)
90
+ generation_config = genai.types.GenerationConfig(temperature=temperature, max_output_tokens=max_new_tokens)
91
+ print(f" Gemini API Call - User Prompt (first 100): {prompt_text[:100]}...")
92
+ if system_prompt_text: print(f" Gemini API Call - System Prompt (first 100): {system_prompt_text[:100]}...")
93
+ raw_response = model_instance.generate_content(prompt_text, generation_config=generation_config, stream=False)
94
+ print(f" Gemini API Call - Raw response for {model_id}. Feedback: {raw_response.prompt_feedback}, Candidates: {'Yes' if raw_response.candidates else 'No'}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  if raw_response.prompt_feedback and raw_response.prompt_feedback.block_reason:
97
  reason = raw_response.prompt_feedback.block_reason_message or raw_response.prompt_feedback.block_reason
98
+ error_msg = f"Gemini API: Prompt blocked. Reason: {reason}."
99
  print(f"WARNING: llm_clients.py - {error_msg}")
100
  return LLMResponse(error=error_msg, success=False, raw_response=raw_response, model_id_used=model_id)
 
101
  if not raw_response.candidates:
102
+ error_msg = "Gemini API: No candidates in response (often due to blocking)."
103
+ if raw_response.prompt_feedback: error_msg += f" Feedback: {raw_response.prompt_feedback}"
104
  print(f"WARNING: llm_clients.py - {error_msg}")
105
  return LLMResponse(error=error_msg, success=False, raw_response=raw_response, model_id_used=model_id)
106
 
107
  candidate = raw_response.candidates[0]
108
  if not candidate.content or not candidate.content.parts:
109
  finish_reason = str(candidate.finish_reason if candidate.finish_reason else "UNKNOWN").upper()
110
+ error_msg = f"Gemini API: No content parts. Finish Reason: {finish_reason}."
111
+ if finish_reason == "SAFETY": error_msg += " Likely safety filters."
 
 
 
112
  print(f"WARNING: llm_clients.py - {error_msg}")
113
+ partial_text = candidate.content.parts[0].text if candidate.content and candidate.content.parts and hasattr(candidate.content.parts[0], 'text') else ""
114
+ return LLMResponse(text=partial_text + f"\n[Note: Generation ended: {finish_reason}]" if partial_text else None, error=error_msg if not partial_text else None, success=bool(partial_text), raw_response=raw_response, model_id_used=model_id)
 
 
 
 
 
 
 
115
 
116
  response_text = candidate.content.parts[0].text
117
+ print(f" Gemini API Call - Success for {model_id}. Response (first 100): {response_text[:100]}...")
118
  return LLMResponse(text=response_text, raw_response=raw_response, model_id_used=model_id)
 
119
  except Exception as e:
120
+ error_msg = f"Gemini API Exception ({model_id}): {type(e).__name__} - {str(e)}"
121
+ # ... (specific error parsing as before) ...
122
+ if "API key not valid" in str(e) or "PERMISSION_DENIED" in str(e): error_msg = f"Gemini API Error ({model_id}): API key invalid/permission denied. Check GOOGLE_API_KEY & Google Cloud. Original: {str(e)}"
123
+ elif "Could not find model" in str(e) : error_msg = f"Gemini API Error ({model_id}): Model ID '{model_id}' not found/inaccessible. Original: {str(e)}"
124
+ elif "Quota exceeded" in str(e): error_msg = f"Gemini API Error ({model_id}): API quota exceeded. Original: {str(e)}"
 
 
 
 
 
 
125
  print(f"ERROR: llm_clients.py - {error_msg}")
126
  return LLMResponse(error=error_msg, success=False, raw_response=e, model_id_used=model_id)