OpenELM_3B_Demo / app.py
JERNGOC's picture
Update app.py
d12b0d2 verified
raw
history blame
725 Bytes
# At the top level of your script, after initializing the tokenizer
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
if tokenizer.pad_token == None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
@spaces.GPU
def generate(
message: str,
chat_history: list[tuple[str, str]],
max_new_tokens: int = 1024,
temperature: float = 0.6,
top_p: float = 0.9,
top_k: int = 50,
repetition_penalty: float = 1.4,
) -> Iterator[str]:
global tokenizer, model # Add this line to access global variables
input_ids = tokenizer([message], return_tensors="pt").input_ids
# ... rest of the function ...
# The rest of your code remains the same