Chris4K commited on
Commit
abaf9f1
·
verified ·
1 Parent(s): 93d9f11

Create services/model_service.py

Browse files
Files changed (1) hide show
  1. services/model_service.py +41 -0
services/model_service.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # services/model_service.py
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ from sentence_transformers import SentenceTransformer
4
+ import torch
5
+ from functools import lru_cache
6
+ from config.config import settings
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class ModelService:
12
+ _instance = None
13
+
14
+ def __new__(cls):
15
+ if cls._instance is None:
16
+ cls._instance = super().__new__(cls)
17
+ cls._instance._initialized = False
18
+ return cls._instance
19
+
20
+ def __init__(self):
21
+ if not self._initialized:
22
+ self._initialized = True
23
+ self._load_models()
24
+
25
+ @lru_cache(maxsize=1)
26
+ def _load_models(self):
27
+ try:
28
+ self.tokenizer = AutoTokenizer.from_pretrained(settings.MODEL_NAME)
29
+ self.model = AutoModelForCausalLM.from_pretrained(
30
+ settings.MODEL_NAME,
31
+ torch_dtype=torch.float16 if settings.DEVICE == "cuda" else torch.float32,
32
+ device_map="auto" if settings.DEVICE == "cuda" else None
33
+ )
34
+ self.embedder = SentenceTransformer(settings.EMBEDDER_MODEL)
35
+ except Exception as e:
36
+ logger.error(f"Error loading models: {e}")
37
+ raise
38
+
39
+ def get_models(self):
40
+ return self.tokenizer, self.model, self.embedder
41
+