Update app.py
Browse files
app.py
CHANGED
@@ -179,7 +179,8 @@ class BaseGenerator(ABC):
|
|
179 |
self.cache = ResponseCache(cache_size)
|
180 |
self.batch_processor = BatchProcessor(max_batch_size)
|
181 |
self.health_check = HealthCheck()
|
182 |
-
|
|
|
183 |
self.default_config = default_generation_config or GenerationConfig()
|
184 |
self.model_config = model_config or ModelConfig()
|
185 |
|
@@ -411,7 +412,9 @@ class LlamaGenerator(BaseGenerator):
|
|
411 |
default_generation_config: Optional[GenerationConfig] = None,
|
412 |
model_config: Optional[ModelConfig] = None,
|
413 |
cache_size: int = 1000,
|
414 |
-
max_batch_size: int = 32
|
|
|
|
|
415 |
):
|
416 |
super().__init__(
|
417 |
llama_model_name,
|
|
|
179 |
self.cache = ResponseCache(cache_size)
|
180 |
self.batch_processor = BatchProcessor(max_batch_size)
|
181 |
self.health_check = HealthCheck()
|
182 |
+
self.tokenizer = self.model_manager.tokenizers[model_name]
|
183 |
+
#self.tokenizer = self.load_tokenizer(llama_model_name) # Add this line to initialize the tokenizer
|
184 |
self.default_config = default_generation_config or GenerationConfig()
|
185 |
self.model_config = model_config or ModelConfig()
|
186 |
|
|
|
412 |
default_generation_config: Optional[GenerationConfig] = None,
|
413 |
model_config: Optional[ModelConfig] = None,
|
414 |
cache_size: int = 1000,
|
415 |
+
max_batch_size: int = 32,
|
416 |
+
# self.tokenizer = self.load_tokenizer(llama_model_name) # Add this line to initialize the tokenizer
|
417 |
+
|
418 |
):
|
419 |
super().__init__(
|
420 |
llama_model_name,
|