Spaces:
Sleeping
Sleeping
import os | |
from threading import Lock | |
from components.llm.common import LlmParams, LlmPredictParams | |
from components.llm.deepinfra_api import DeepInfraApi | |
class LlmApi: | |
_instance = None | |
_lock = Lock() | |
def __new__(cls): | |
with cls._lock: | |
if cls._instance is None: | |
cls._instance = super(LlmApi, cls).__new__(cls) | |
cls._instance._initialize() | |
return cls._instance | |
def _initialize(self): | |
LLM_API_URL = os.getenv("LLM_API_URL", "https://api.deepinfra.com") | |
LLM_API_KEY = os.getenv("DEEPINFRA_API_KEY", "") | |
LLM_NAME = os.getenv("LLM_NAME", "meta-llama/Llama-3.3-70B-Instruct-Turbo") | |
TOKENIZER_NAME = os.getenv("TOKENIZER_NAME", "unsloth/Llama-3.3-70B-Instruct") | |
default_llm_params = LlmParams( | |
url=LLM_API_URL, | |
api_key=LLM_API_KEY, | |
model=LLM_NAME, | |
tokenizer=TOKENIZER_NAME, | |
context_length=130000, | |
predict_params=LlmPredictParams( | |
temperature=0.15, top_p=0.95, min_p=0.05, seed=42, | |
repetition_penalty=1.2, presence_penalty=1.1, n_predict=6000 | |
) | |
) | |
self.api = DeepInfraApi(default_llm_params) | |
def get_api(self): | |
return self.api |