import os from threading import Lock from components.llm.common import LlmParams, LlmPredictParams from components.llm.deepinfra_api import DeepInfraApi class LlmApi: _instance = None _lock = Lock() def __new__(cls): with cls._lock: if cls._instance is None: cls._instance = super(LlmApi, cls).__new__(cls) cls._instance._initialize() return cls._instance def _initialize(self): LLM_API_URL = os.getenv("LLM_API_URL", "https://api.deepinfra.com") LLM_API_KEY = os.getenv("DEEPINFRA_API_KEY", "") LLM_NAME = os.getenv("LLM_NAME", "meta-llama/Llama-3.3-70B-Instruct-Turbo") TOKENIZER_NAME = os.getenv("TOKENIZER_NAME", "unsloth/Llama-3.3-70B-Instruct") default_llm_params = LlmParams( url=LLM_API_URL, api_key=LLM_API_KEY, model=LLM_NAME, tokenizer=TOKENIZER_NAME, context_length=130000, predict_params=LlmPredictParams( temperature=0.15, top_p=0.95, min_p=0.05, seed=42, repetition_penalty=1.2, presence_penalty=1.1, n_predict=6000 ) ) self.api = DeepInfraApi(default_llm_params) def get_api(self): return self.api