Spaces:
Build error
Build error
from typing import Generator | |
from transformers import AutoTokenizer | |
from llm_engineering.settings import settings | |
def flatten(nested_list: list) -> list: | |
"""Flatten a list of lists into a single list.""" | |
return [item for sublist in nested_list for item in sublist] | |
def batch(list_: list, size: int) -> Generator[list, None, None]: | |
yield from (list_[i : i + size] for i in range(0, len(list_), size)) | |
def compute_num_tokens(text: str) -> int: | |
tokenizer = AutoTokenizer.from_pretrained(settings.HF_MODEL_ID) | |
return len(tokenizer.encode(text, add_special_tokens=False)) | |