Spaces:
Running
Running
File size: 1,531 Bytes
d8e2b36 f42cab1 86b351a f42cab1 d8e2b36 86b351a 21eb680 86b351a 21eb680 d8e2b36 86b351a d8e2b36 21eb680 86b351a d8e2b36 f42cab1 d8e2b36 86b351a d8e2b36 f42cab1 86b351a f42cab1 4310b90 f42cab1 4310b90 2999669 4310b90 f42cab1 4310b90 f42cab1 86b351a d8e2b36 86b351a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
"""Utility functions for working with the language model."""
import asyncio
import logging
from google.api_core.exceptions import DeadlineExceeded
from langchain_google_genai import ChatGoogleGenerativeAI
from config import settings
from services.google import ApiKeyPool
logger = logging.getLogger(__name__)
_pool = ApiKeyPool()
MODEL_NAME = "gemini-2.5-flash-preview-05-20"
def _get_api_key() -> str:
"""Return an API key using round-robin selection in a thread-safe way."""
return _pool.get_key_sync()
def create_llm(
temperature: float = settings.temperature,
top_p: float = settings.top_p,
) -> ChatGoogleGenerativeAI:
"""Create a standard LLM instance."""
llm = ChatGoogleGenerativeAI(
model=MODEL_NAME,
google_api_key=_get_api_key(),
temperature=temperature,
top_p=top_p,
thinking_budget=1024,
timeout=settings.request_timeout,
max_retries=3,
)
return llm
def create_light_llm(temperature: float = settings.temperature, top_p: float = settings.top_p):
"""Create a light LLM instance with a shorter timeout."""
llm = ChatGoogleGenerativeAI(
model="gemini-2.0-flash",
google_api_key=_get_api_key(),
temperature=temperature,
top_p=top_p,
timeout=settings.request_timeout,
max_retries=3,
)
return llm
def create_precise_llm() -> ChatGoogleGenerativeAI:
"""Return an LLM tuned for deterministic output."""
return create_llm(temperature=0, top_p=1)
|