Spaces:
Sleeping
Sleeping
File size: 8,155 Bytes
250b6ae 256d4e5 250b6ae 10fdd4a 250b6ae b967045 10fdd4a 256d4e5 250b6ae 256d4e5 250b6ae 10fdd4a 256d4e5 250b6ae 256d4e5 250b6ae 10fdd4a 256d4e5 250b6ae 256d4e5 250b6ae 10fdd4a 256d4e5 250b6ae 256d4e5 10fdd4a 256d4e5 250b6ae 256d4e5 250b6ae 256d4e5 10fdd4a b967045 256d4e5 10fdd4a 256d4e5 250b6ae 256d4e5 b967045 250b6ae 256d4e5 250b6ae b967045 250b6ae 256d4e5 10fdd4a 250b6ae 256d4e5 250b6ae 256d4e5 250b6ae b967045 10fdd4a 256d4e5 b967045 10fdd4a 256d4e5 250b6ae 256d4e5 250b6ae 10fdd4a 256d4e5 10fdd4a 250b6ae 256d4e5 250b6ae b967045 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# algoforge_prime/core/llm_clients.py
import os
import google.generativeai as genai
from huggingface_hub import InferenceClient
import time
# --- Configuration ---
GOOGLE_API_KEY = None
HF_TOKEN = None
GEMINI_API_CONFIGURED = False
HF_API_CONFIGURED = False
hf_inference_client = None
def initialize_all_clients():
global GOOGLE_API_KEY, HF_TOKEN, GEMINI_API_CONFIGURED, HF_API_CONFIGURED, hf_inference_client
print("INFO: llm_clients.py - Attempting to initialize all API clients...")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
if GOOGLE_API_KEY and GOOGLE_API_KEY.strip():
print("INFO: llm_clients.py - GOOGLE_API_KEY found.")
try:
genai.configure(api_key=GOOGLE_API_KEY)
GEMINI_API_CONFIGURED = True
print("SUCCESS: llm_clients.py - Google Gemini API configured.")
except Exception as e:
GEMINI_API_CONFIGURED = False
print(f"ERROR: llm_clients.py - Failed to configure Google Gemini API: {type(e).__name__}: {e}")
else:
GEMINI_API_CONFIGURED = False
print("WARNING: llm_clients.py - GOOGLE_API_KEY not found or empty.")
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN and HF_TOKEN.strip():
print("INFO: llm_clients.py - HF_TOKEN found.")
try:
hf_inference_client = InferenceClient(token=HF_TOKEN)
HF_API_CONFIGURED = True
print("SUCCESS: llm_clients.py - Hugging Face InferenceClient initialized.")
except Exception as e:
HF_API_CONFIGURED = False
print(f"ERROR: llm_clients.py - Failed to initialize HF InferenceClient: {type(e).__name__}: {e}")
hf_inference_client = None
else:
HF_API_CONFIGURED = False
print("WARNING: llm_clients.py - HF_TOKEN not found or empty.")
print(f"INFO: llm_clients.py - Init complete. Gemini Configured: {GEMINI_API_CONFIGURED}, HF Configured: {HF_API_CONFIGURED}")
# --- Status Getter Functions ---
def is_gemini_api_configured():
global GEMINI_API_CONFIGURED
return GEMINI_API_CONFIGURED
def is_hf_api_configured():
global HF_API_CONFIGURED
return HF_API_CONFIGURED
# ... (LLMResponse class and call_huggingface_api function remain the same as the last full version) ...
class LLMResponse: # Make sure this is defined
def __init__(self, text=None, error=None, success=True, raw_response=None, model_id_used="unknown"):
self.text, self.error, self.success, self.raw_response, self.model_id_used = text, error, success, raw_response, model_id_used
def __str__(self): return str(self.text) if self.success and self.text is not None else f"ERROR (Model: {self.model_id_used}): {self.error}"
def call_huggingface_api(prompt_text, model_id, temperature=0.7, max_new_tokens=512, system_prompt_text=None):
print(f"DEBUG: llm_clients.py - call_huggingface_api for model: {model_id}")
if not is_hf_api_configured() or not hf_inference_client: # Use getter
error_msg = "HF API not configured."
print(f"ERROR: llm_clients.py - {error_msg}")
return LLMResponse(error=error_msg, success=False, model_id_used=model_id)
full_prompt = f"<s>[INST] <<SYS>>\n{system_prompt_text}\n<</SYS>>\n\n{prompt_text} [/INST]" if system_prompt_text else prompt_text
try:
print(f" HF API Call - Prompt (first 100): {full_prompt[:100]}...")
use_sample = temperature > 0.001
raw_response = hf_inference_client.text_generation(full_prompt, model=model_id, max_new_tokens=max_new_tokens, temperature=temperature if use_sample else None, do_sample=use_sample, stream=False)
print(f" HF API Call - Success for {model_id}. Response (first 100): {str(raw_response)[:100]}...")
return LLMResponse(text=raw_response, raw_response=raw_response, model_id_used=model_id)
except Exception as e:
error_msg = f"HF API Error ({model_id}): {type(e).__name__} - {str(e)}"
print(f"ERROR: llm_clients.py - {error_msg}")
return LLMResponse(error=error_msg, success=False, raw_response=e, model_id_used=model_id)
def call_gemini_api(prompt_text, model_id, temperature=0.7, max_new_tokens=1024, system_prompt_text=None): # Increased default max_tokens
print(f"DEBUG: llm_clients.py - call_gemini_api for model: {model_id}")
if not is_gemini_api_configured(): # Use getter
error_msg = "Google Gemini API not configured."
print(f"ERROR: llm_clients.py - {error_msg}")
return LLMResponse(error=error_msg, success=False, model_id_used=model_id)
try:
print(f" Gemini API Call - Getting instance for: {model_id}")
model_instance = genai.GenerativeModel(model_name=model_id, system_instruction=system_prompt_text)
generation_config = genai.types.GenerationConfig(temperature=temperature, max_output_tokens=max_new_tokens)
print(f" Gemini API Call - User Prompt (first 100): {prompt_text[:100]}...")
if system_prompt_text: print(f" Gemini API Call - System Prompt (first 100): {system_prompt_text[:100]}...")
raw_response = model_instance.generate_content(prompt_text, generation_config=generation_config, stream=False)
print(f" Gemini API Call - Raw response for {model_id}. Feedback: {raw_response.prompt_feedback}, Candidates: {'Yes' if raw_response.candidates else 'No'}")
if raw_response.prompt_feedback and raw_response.prompt_feedback.block_reason:
reason = raw_response.prompt_feedback.block_reason_message or raw_response.prompt_feedback.block_reason
error_msg = f"Gemini API: Prompt blocked. Reason: {reason}."
print(f"WARNING: llm_clients.py - {error_msg}")
return LLMResponse(error=error_msg, success=False, raw_response=raw_response, model_id_used=model_id)
if not raw_response.candidates:
error_msg = "Gemini API: No candidates in response (often due to blocking)."
if raw_response.prompt_feedback: error_msg += f" Feedback: {raw_response.prompt_feedback}"
print(f"WARNING: llm_clients.py - {error_msg}")
return LLMResponse(error=error_msg, success=False, raw_response=raw_response, model_id_used=model_id)
candidate = raw_response.candidates[0]
if not candidate.content or not candidate.content.parts:
finish_reason = str(candidate.finish_reason if candidate.finish_reason else "UNKNOWN").upper()
error_msg = f"Gemini API: No content parts. Finish Reason: {finish_reason}."
if finish_reason == "SAFETY": error_msg += " Likely safety filters."
print(f"WARNING: llm_clients.py - {error_msg}")
partial_text = candidate.content.parts[0].text if candidate.content and candidate.content.parts and hasattr(candidate.content.parts[0], 'text') else ""
return LLMResponse(text=partial_text + f"\n[Note: Generation ended: {finish_reason}]" if partial_text else None, error=error_msg if not partial_text else None, success=bool(partial_text), raw_response=raw_response, model_id_used=model_id)
response_text = candidate.content.parts[0].text
print(f" Gemini API Call - Success for {model_id}. Response (first 100): {response_text[:100]}...")
return LLMResponse(text=response_text, raw_response=raw_response, model_id_used=model_id)
except Exception as e:
error_msg = f"Gemini API Exception ({model_id}): {type(e).__name__} - {str(e)}"
# ... (specific error parsing as before) ...
if "API key not valid" in str(e) or "PERMISSION_DENIED" in str(e): error_msg = f"Gemini API Error ({model_id}): API key invalid/permission denied. Check GOOGLE_API_KEY & Google Cloud. Original: {str(e)}"
elif "Could not find model" in str(e) : error_msg = f"Gemini API Error ({model_id}): Model ID '{model_id}' not found/inaccessible. Original: {str(e)}"
elif "Quota exceeded" in str(e): error_msg = f"Gemini API Error ({model_id}): API quota exceeded. Original: {str(e)}"
print(f"ERROR: llm_clients.py - {error_msg}")
return LLMResponse(error=error_msg, success=False, raw_response=e, model_id_used=model_id) |