Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
from transformers import AutoTokenizer, GenerationConfig | |
from transformers import AutoModelForCausalLM | |
from transformers import TextIteratorStreamer | |
from threading import Thread | |
import numpy as np | |
temperature = 1.0 | |
repetition_penalty = 1.0 | |
perform_fewshot = False | |
few_shot_examples = 6 | |
use_gpu = True | |
batch_size = 1 | |
def generate_and_tokenize_instruction(sample,return_tensors=None): | |
prompt = f"""Abaixo encontrase unha instruci贸n que describe unha tarefa, xunto cunha entrada que proporciona m谩is contexto. | |
Escribe unha resposta que responda adecuadamente a entrada. | |
### Instruci贸n: | |
{sample['instruction']} | |
### Entrada: | |
{sample['input']} | |
### Resposta: | |
{sample['output']}\n""" | |
instruction = tokenizer(prompt,return_tensors=return_tensors) | |
return instruction | |
def generate(model,tokenizer,instruction,context,streamer): | |
prompt = generate_and_tokenize_instruction({'instruction':instruction,'input':context,'output':''},return_tensors="pt") | |
input_ids = prompt["input_ids"]#.to('cuda:0') | |
generation_output = model.generate( | |
input_ids=input_ids, | |
generation_config=GenerationConfig(do_sample=True), | |
return_dict_in_generate=True, | |
output_scores=True, | |
max_new_tokens=max_length, | |
temperature=temperature, | |
top_k=10, | |
repetition_penalty=repetition_penalty, | |
eos_token_id=tokenizer.eos_token_id, | |
streamer=streamer | |
) | |
outputs = [] | |
for o in generation_output.sequences: | |
output = tokenizer.decode(o) | |
output = output.replace("Abaixo encontrase unha instruci脙鲁n que describe unha tarefa, xunto cunha entrada que proporciona m脙隆is contexto. \nEscribe unha resposta que responda adecuadamente a entrada.\n",'') | |
output = output.split("###")[3].split("<|endoftext|>", 1)[0] | |
return output | |
def get_model(): | |
# Create a model of the specified type. | |
return AutoModelForCausalLM.from_pretrained("lagy/carballo-1.3b-finetuned") | |
def get_tokenizer(): | |
# Create a model of the specified type. | |
return AutoTokenizer.from_pretrained("lagy/carballo-1.3b-finetuned") | |
st.write(" ") | |
instruction = st.text_area('Instrucci贸n') | |
context = st.text_area('Contexto') | |
max_length = st.number_input('Max generation length',value=120) | |
model = get_model() | |
tokenizer = get_tokenizer() | |
model.eval() | |
if 'reply' not in st.session_state: | |
st.session_state['reply'] = '' | |
if st.button('Generate'): | |
streamer = TextIteratorStreamer(tokenizer) | |
thread = Thread(target=generate,args=(model,tokenizer,instruction,context,streamer)) | |
thread.start() | |
st.write("Generating....") | |
next(streamer) | |
def iterator(): | |
for new_text in streamer: | |
new_text = new_text.split("<|endoftext|>", 1)[0] | |
yield new_text | |
st.write_stream(iterator) | |
#for new_text in streamer: | |
#i += 1 | |
#if (i != 1): | |
#new_text = new_text.split("<|endoftext|>", 1)[0] | |
#text += new_text | |