llm-token-probs / app.py
patpizio's picture
Update app.py
80f54d7
raw
history blame
1.56 kB
import torch
import streamlit as st
from transformers import AutoTokenizer, T5Tokenizer, T5ForConditionalGeneration, GenerationConfig, AutoModelForCausalLM
st.title('How does the LLM choose its words?')
model_checkpoint = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = T5ForConditionalGeneration.from_pretrained(
model_checkpoint,
load_in_8bit=False,
device_map="auto"
)
instruction = st.text_area('Write an instruction:')
max_tokens = st.number_input('Max output length: ', min_value=1, max_value=64, format='%i')
prompts = [
f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction: {instruction}
### Response:"""
]
inputs = tokenizer(
prompts[0],
return_tensors="pt",
)
input_ids = inputs["input_ids"]#.to("cuda")
generation_config = GenerationConfig(
do_sample=True,
temperature=0.8, # default 0.1
top_p=0.995, # default 0.75
top_k=100, # default 80
repetition_penalty=1.5,
max_new_tokens=max_tokens,
)
if instruction:
with torch.no_grad():
outputs = model.generate(
input_ids=input_ids,
attention_mask=torch.ones_like(input_ids),
generation_config=generation_config,
return_dict_in_generate=True,
output_scores=True
)
output_text = tokenizer.decode(
outputs['sequences'][0],#.cuda(),
skip_special_tokens=False
).strip()
st.write(output_text)