Spaces:
Running
Running
chore: update estimate tokens
Browse files
lightweight_embeddings/service.py
CHANGED
@@ -369,12 +369,13 @@ class EmbeddingsService:
|
|
369 |
}
|
370 |
|
371 |
def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
|
|
378 |
|
379 |
@staticmethod
|
380 |
def softmax(scores: np.ndarray) -> np.ndarray:
|
|
|
369 |
}
|
370 |
|
371 |
def estimate_tokens(self, input_data: Union[str, List[str]]) -> int:
|
372 |
+
"""
|
373 |
+
Estimate token count using the model's tokenizer.
|
374 |
+
"""
|
375 |
+
texts = self._validate_text_input(input_data)
|
376 |
+
model = self.text_models[self.config.text_model_type]
|
377 |
+
tokenized = model.tokenize(texts)
|
378 |
+
return sum(len(ids) for ids in tokenized['input_ids'])
|
379 |
|
380 |
@staticmethod
|
381 |
def softmax(scores: np.ndarray) -> np.ndarray:
|