File size: 8,155 Bytes
250b6ae
 
 
 
256d4e5
250b6ae
 
10fdd4a
 
250b6ae
 
 
 
b967045
10fdd4a
 
 
256d4e5
 
250b6ae
 
 
256d4e5
250b6ae
10fdd4a
256d4e5
250b6ae
256d4e5
 
250b6ae
10fdd4a
256d4e5
 
250b6ae
 
 
256d4e5
250b6ae
10fdd4a
256d4e5
 
250b6ae
256d4e5
 
 
10fdd4a
256d4e5
 
 
 
250b6ae
256d4e5
 
 
250b6ae
256d4e5
 
 
 
 
10fdd4a
b967045
256d4e5
 
 
10fdd4a
 
256d4e5
250b6ae
256d4e5
 
 
 
b967045
250b6ae
256d4e5
250b6ae
b967045
250b6ae
256d4e5
 
 
 
 
10fdd4a
 
250b6ae
256d4e5
 
 
 
 
 
 
250b6ae
 
 
256d4e5
250b6ae
b967045
10fdd4a
256d4e5
 
b967045
 
 
 
 
10fdd4a
256d4e5
 
250b6ae
256d4e5
 
250b6ae
10fdd4a
256d4e5
10fdd4a
250b6ae
256d4e5
 
 
 
 
250b6ae
b967045
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# algoforge_prime/core/llm_clients.py
import os
import google.generativeai as genai
from huggingface_hub import InferenceClient
import time

# --- Configuration ---
GOOGLE_API_KEY = None
HF_TOKEN = None
GEMINI_API_CONFIGURED = False
HF_API_CONFIGURED = False
hf_inference_client = None

def initialize_all_clients():
    global GOOGLE_API_KEY, HF_TOKEN, GEMINI_API_CONFIGURED, HF_API_CONFIGURED, hf_inference_client
    print("INFO: llm_clients.py - Attempting to initialize all API clients...")
    GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
    if GOOGLE_API_KEY and GOOGLE_API_KEY.strip():
        print("INFO: llm_clients.py - GOOGLE_API_KEY found.")
        try:
            genai.configure(api_key=GOOGLE_API_KEY)
            GEMINI_API_CONFIGURED = True
            print("SUCCESS: llm_clients.py - Google Gemini API configured.")
        except Exception as e:
            GEMINI_API_CONFIGURED = False
            print(f"ERROR: llm_clients.py - Failed to configure Google Gemini API: {type(e).__name__}: {e}")
    else:
        GEMINI_API_CONFIGURED = False
        print("WARNING: llm_clients.py - GOOGLE_API_KEY not found or empty.")

    HF_TOKEN = os.getenv("HF_TOKEN")
    if HF_TOKEN and HF_TOKEN.strip():
        print("INFO: llm_clients.py - HF_TOKEN found.")
        try:
            hf_inference_client = InferenceClient(token=HF_TOKEN)
            HF_API_CONFIGURED = True
            print("SUCCESS: llm_clients.py - Hugging Face InferenceClient initialized.")
        except Exception as e:
            HF_API_CONFIGURED = False
            print(f"ERROR: llm_clients.py - Failed to initialize HF InferenceClient: {type(e).__name__}: {e}")
            hf_inference_client = None
    else:
        HF_API_CONFIGURED = False
        print("WARNING: llm_clients.py - HF_TOKEN not found or empty.")
    print(f"INFO: llm_clients.py - Init complete. Gemini Configured: {GEMINI_API_CONFIGURED}, HF Configured: {HF_API_CONFIGURED}")

# --- Status Getter Functions ---
def is_gemini_api_configured():
    global GEMINI_API_CONFIGURED
    return GEMINI_API_CONFIGURED

def is_hf_api_configured():
    global HF_API_CONFIGURED
    return HF_API_CONFIGURED

# ... (LLMResponse class and call_huggingface_api function remain the same as the last full version) ...
class LLMResponse: # Make sure this is defined
    def __init__(self, text=None, error=None, success=True, raw_response=None, model_id_used="unknown"):
        self.text, self.error, self.success, self.raw_response, self.model_id_used = text, error, success, raw_response, model_id_used
    def __str__(self): return str(self.text) if self.success and self.text is not None else f"ERROR (Model: {self.model_id_used}): {self.error}"

def call_huggingface_api(prompt_text, model_id, temperature=0.7, max_new_tokens=512, system_prompt_text=None):
    print(f"DEBUG: llm_clients.py - call_huggingface_api for model: {model_id}")
    if not is_hf_api_configured() or not hf_inference_client: # Use getter
        error_msg = "HF API not configured."
        print(f"ERROR: llm_clients.py - {error_msg}")
        return LLMResponse(error=error_msg, success=False, model_id_used=model_id)
    full_prompt = f"<s>[INST] <<SYS>>\n{system_prompt_text}\n<</SYS>>\n\n{prompt_text} [/INST]" if system_prompt_text else prompt_text
    try:
        print(f"  HF API Call - Prompt (first 100): {full_prompt[:100]}...")
        use_sample = temperature > 0.001
        raw_response = hf_inference_client.text_generation(full_prompt, model=model_id, max_new_tokens=max_new_tokens, temperature=temperature if use_sample else None, do_sample=use_sample, stream=False)
        print(f"  HF API Call - Success for {model_id}. Response (first 100): {str(raw_response)[:100]}...")
        return LLMResponse(text=raw_response, raw_response=raw_response, model_id_used=model_id)
    except Exception as e:
        error_msg = f"HF API Error ({model_id}): {type(e).__name__} - {str(e)}"
        print(f"ERROR: llm_clients.py - {error_msg}")
        return LLMResponse(error=error_msg, success=False, raw_response=e, model_id_used=model_id)


def call_gemini_api(prompt_text, model_id, temperature=0.7, max_new_tokens=1024, system_prompt_text=None): # Increased default max_tokens
    print(f"DEBUG: llm_clients.py - call_gemini_api for model: {model_id}")
    if not is_gemini_api_configured(): # Use getter
        error_msg = "Google Gemini API not configured."
        print(f"ERROR: llm_clients.py - {error_msg}")
        return LLMResponse(error=error_msg, success=False, model_id_used=model_id)
    try:
        print(f"  Gemini API Call - Getting instance for: {model_id}")
        model_instance = genai.GenerativeModel(model_name=model_id, system_instruction=system_prompt_text)
        generation_config = genai.types.GenerationConfig(temperature=temperature, max_output_tokens=max_new_tokens)
        print(f"  Gemini API Call - User Prompt (first 100): {prompt_text[:100]}...")
        if system_prompt_text: print(f"  Gemini API Call - System Prompt (first 100): {system_prompt_text[:100]}...")
        raw_response = model_instance.generate_content(prompt_text, generation_config=generation_config, stream=False)
        print(f"  Gemini API Call - Raw response for {model_id}. Feedback: {raw_response.prompt_feedback}, Candidates: {'Yes' if raw_response.candidates else 'No'}")

        if raw_response.prompt_feedback and raw_response.prompt_feedback.block_reason:
            reason = raw_response.prompt_feedback.block_reason_message or raw_response.prompt_feedback.block_reason
            error_msg = f"Gemini API: Prompt blocked. Reason: {reason}."
            print(f"WARNING: llm_clients.py - {error_msg}")
            return LLMResponse(error=error_msg, success=False, raw_response=raw_response, model_id_used=model_id)
        if not raw_response.candidates:
            error_msg = "Gemini API: No candidates in response (often due to blocking)."
            if raw_response.prompt_feedback: error_msg += f" Feedback: {raw_response.prompt_feedback}"
            print(f"WARNING: llm_clients.py - {error_msg}")
            return LLMResponse(error=error_msg, success=False, raw_response=raw_response, model_id_used=model_id)

        candidate = raw_response.candidates[0]
        if not candidate.content or not candidate.content.parts:
            finish_reason = str(candidate.finish_reason if candidate.finish_reason else "UNKNOWN").upper()
            error_msg = f"Gemini API: No content parts. Finish Reason: {finish_reason}."
            if finish_reason == "SAFETY": error_msg += " Likely safety filters."
            print(f"WARNING: llm_clients.py - {error_msg}")
            partial_text = candidate.content.parts[0].text if candidate.content and candidate.content.parts and hasattr(candidate.content.parts[0], 'text') else ""
            return LLMResponse(text=partial_text + f"\n[Note: Generation ended: {finish_reason}]" if partial_text else None, error=error_msg if not partial_text else None, success=bool(partial_text), raw_response=raw_response, model_id_used=model_id)
        
        response_text = candidate.content.parts[0].text
        print(f"  Gemini API Call - Success for {model_id}. Response (first 100): {response_text[:100]}...")
        return LLMResponse(text=response_text, raw_response=raw_response, model_id_used=model_id)
    except Exception as e:
        error_msg = f"Gemini API Exception ({model_id}): {type(e).__name__} - {str(e)}"
        # ... (specific error parsing as before) ...
        if "API key not valid" in str(e) or "PERMISSION_DENIED" in str(e): error_msg = f"Gemini API Error ({model_id}): API key invalid/permission denied. Check GOOGLE_API_KEY & Google Cloud. Original: {str(e)}"
        elif "Could not find model" in str(e) : error_msg = f"Gemini API Error ({model_id}): Model ID '{model_id}' not found/inaccessible. Original: {str(e)}"
        elif "Quota exceeded" in str(e): error_msg = f"Gemini API Error ({model_id}): API quota exceeded. Original: {str(e)}"
        print(f"ERROR: llm_clients.py - {error_msg}")
        return LLMResponse(error=error_msg, success=False, raw_response=e, model_id_used=model_id)