File size: 1,531 Bytes
d8e2b36
 
f42cab1
86b351a
f42cab1
 
d8e2b36
 
86b351a
21eb680
86b351a
 
 
21eb680
d8e2b36
86b351a
 
d8e2b36
21eb680
 
86b351a
 
d8e2b36
 
 
 
 
f42cab1
d8e2b36
 
86b351a
 
d8e2b36
f42cab1
 
86b351a
f42cab1
4310b90
 
 
f42cab1
 
4310b90
2999669
4310b90
f42cab1
 
 
4310b90
f42cab1
86b351a
d8e2b36
 
 
86b351a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""Utility functions for working with the language model."""

import asyncio
import logging

from google.api_core.exceptions import DeadlineExceeded
from langchain_google_genai import ChatGoogleGenerativeAI

from config import settings
from services.google import ApiKeyPool

logger = logging.getLogger(__name__)

_pool = ApiKeyPool()
MODEL_NAME = "gemini-2.5-flash-preview-05-20"


def _get_api_key() -> str:
    """Return an API key using round-robin selection in a thread-safe way."""
    return _pool.get_key_sync()


def create_llm(
    temperature: float = settings.temperature,
    top_p: float = settings.top_p,
) -> ChatGoogleGenerativeAI:
    """Create a standard LLM instance."""
    llm = ChatGoogleGenerativeAI(
        model=MODEL_NAME,
        google_api_key=_get_api_key(),
        temperature=temperature,
        top_p=top_p,
        thinking_budget=1024,
        timeout=settings.request_timeout,
        max_retries=3,
    )
    return llm
    
    
def create_light_llm(temperature: float = settings.temperature, top_p: float = settings.top_p):
    """Create a light LLM instance with a shorter timeout."""
    llm = ChatGoogleGenerativeAI(
        model="gemini-2.0-flash",
        google_api_key=_get_api_key(),
        temperature=temperature,
        top_p=top_p,
        timeout=settings.request_timeout,
        max_retries=3,
    )
    return llm


def create_precise_llm() -> ChatGoogleGenerativeAI:
    """Return an LLM tuned for deterministic output."""
    return create_llm(temperature=0, top_p=1)