Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM | |
from peft import PeftModel, PeftConfig | |
device = "cuda" # the device to load the model onto | |
peft_model_id = "andreabac3/DanteLLM_instruct_7b-v0.2-boosted" | |
config = PeftConfig.from_pretrained(peft_model_id) | |
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, device_map="auto", load_in_8bit=True) | |
model = PeftModel.from_pretrained(model, peft_model_id) | |
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) | |
model.eval() | |
def dante_eval(prompt): | |
print(prompt) | |
messages = [ | |
{"role": "user", "content": prompt} | |
] | |
# messages = [ | |
# {"role": "user", "content": "Ciao chi sei?"}, | |
# {"role": "assistant", "content": "Ciao, sono Open Fauno, un large language model. Come posso aiutarti?"}, | |
# {"role": "user", "content": "Quanto dista la Terra dalla Luna?"} | |
# ] | |
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt") | |
model_inputs = encodeds.to(device) | |
model.to(device) | |
generated_ids = model.generate(input_ids=model_inputs, max_new_tokens=300, do_sample=True, temperature=0.2) | |
decoded = tokenizer.batch_decode(generated_ids) | |
# print(decoded[0]) | |
# La Terra si trova a 384,400 chilometri (238,855 miglia) dalla Luna. La distanza varia leggermente a causa della sua orbita ellittica. | |
return decoded[0] | |
iface = gr.Interface(fn=dante_eval, inputs="text", outputs="text") | |
iface.launch() | |