Update app.py
Browse files
app.py
CHANGED
@@ -179,7 +179,7 @@ class BaseGenerator(ABC):
|
|
179 |
self.cache = ResponseCache(cache_size)
|
180 |
self.batch_processor = BatchProcessor(max_batch_size)
|
181 |
self.health_check = HealthCheck()
|
182 |
-
|
183 |
#self.tokenizer = self.load_tokenizer(llama_model_name) # Add this line to initialize the tokenizer
|
184 |
self.default_config = default_generation_config or GenerationConfig()
|
185 |
self.model_config = model_config or ModelConfig()
|
@@ -413,9 +413,24 @@ class LlamaGenerator(BaseGenerator):
|
|
413 |
model_config: Optional[ModelConfig] = None,
|
414 |
cache_size: int = 1000,
|
415 |
max_batch_size: int = 32,
|
|
|
416 |
# self.tokenizer = self.load_tokenizer(llama_model_name) # Add this line to initialize the tokenizer
|
417 |
|
418 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
super().__init__(
|
420 |
llama_model_name,
|
421 |
device,
|
|
|
179 |
self.cache = ResponseCache(cache_size)
|
180 |
self.batch_processor = BatchProcessor(max_batch_size)
|
181 |
self.health_check = HealthCheck()
|
182 |
+
# self.tokenizer = self.model_manager.tokenizers[model_name]
|
183 |
#self.tokenizer = self.load_tokenizer(llama_model_name) # Add this line to initialize the tokenizer
|
184 |
self.default_config = default_generation_config or GenerationConfig()
|
185 |
self.model_config = model_config or ModelConfig()
|
|
|
413 |
model_config: Optional[ModelConfig] = None,
|
414 |
cache_size: int = 1000,
|
415 |
max_batch_size: int = 32,
|
416 |
+
self.tokenizer = self.load_tokenizer(llama_model_name)
|
417 |
# self.tokenizer = self.load_tokenizer(llama_model_name) # Add this line to initialize the tokenizer
|
418 |
|
419 |
):
|
420 |
+
|
421 |
+
#self.tokenizer = self.load_tokenizer(llama_model_name) # Add this line to initialize the tokenizer
|
422 |
+
|
423 |
+
def load_model(self, model_name: str):
|
424 |
+
# Code to load your model, e.g., Hugging Face's transformers library
|
425 |
+
from transformers import AutoModelForCausalLM
|
426 |
+
return AutoModelForCausalLM.from_pretrained(model_name)
|
427 |
+
|
428 |
+
def load_tokenizer(self, model_name: str):
|
429 |
+
# Load the tokenizer associated with the model
|
430 |
+
from transformers import AutoTokenizer
|
431 |
+
return AutoTokenizer.from_pretrained(model_name)
|
432 |
+
|
433 |
+
|
434 |
super().__init__(
|
435 |
llama_model_name,
|
436 |
device,
|