lagy's picture
Update app.py
b39018a verified
import streamlit as st
import torch
from transformers import AutoTokenizer, GenerationConfig
from transformers import AutoModelForCausalLM
from transformers import TextIteratorStreamer
from threading import Thread
import numpy as np
temperature = 1.0
repetition_penalty = 1.0
perform_fewshot = False
few_shot_examples = 6
use_gpu = True
batch_size = 1
def generate_and_tokenize_instruction(sample,return_tensors=None):
prompt = f"""Abaixo encontrase unha instruci贸n que describe unha tarefa, xunto cunha entrada que proporciona m谩is contexto.
Escribe unha resposta que responda adecuadamente a entrada.
### Instruci贸n:
{sample['instruction']}
### Entrada:
{sample['input']}
### Resposta:
{sample['output']}\n"""
instruction = tokenizer(prompt,return_tensors=return_tensors)
return instruction
def generate(model,tokenizer,instruction,context,streamer):
prompt = generate_and_tokenize_instruction({'instruction':instruction,'input':context,'output':''},return_tensors="pt")
input_ids = prompt["input_ids"]#.to('cuda:0')
generation_output = model.generate(
input_ids=input_ids,
generation_config=GenerationConfig(do_sample=True),
return_dict_in_generate=True,
output_scores=True,
max_new_tokens=max_length,
temperature=temperature,
top_k=10,
repetition_penalty=repetition_penalty,
eos_token_id=tokenizer.eos_token_id,
streamer=streamer
)
outputs = []
for o in generation_output.sequences:
output = tokenizer.decode(o)
output = output.replace("Abaixo encontrase unha instruci脙鲁n que describe unha tarefa, xunto cunha entrada que proporciona m脙隆is contexto. \nEscribe unha resposta que responda adecuadamente a entrada.\n",'')
output = output.split("###")[3].split("<|endoftext|>", 1)[0]
return output
@st.cache_resource
def get_model():
# Create a model of the specified type.
return AutoModelForCausalLM.from_pretrained("lagy/carballo-1.3b-finetuned")
@st.cache_resource
def get_tokenizer():
# Create a model of the specified type.
return AutoTokenizer.from_pretrained("lagy/carballo-1.3b-finetuned")
st.write(" ")
instruction = st.text_area('Instrucci贸n')
context = st.text_area('Contexto')
max_length = st.number_input('Max generation length',value=120)
model = get_model()
tokenizer = get_tokenizer()
model.eval()
if 'reply' not in st.session_state:
st.session_state['reply'] = ''
if st.button('Generate'):
streamer = TextIteratorStreamer(tokenizer)
thread = Thread(target=generate,args=(model,tokenizer,instruction,context,streamer))
thread.start()
st.write("Generating....")
next(streamer)
def iterator():
for new_text in streamer:
new_text = new_text.split("<|endoftext|>", 1)[0]
yield new_text
st.write_stream(iterator)
#for new_text in streamer:
#i += 1
#if (i != 1):
#new_text = new_text.split("<|endoftext|>", 1)[0]
#text += new_text