import streamlit as st import torch from transformers import AutoTokenizer, GenerationConfig from transformers import AutoModelForCausalLM from transformers import TextIteratorStreamer from threading import Thread import numpy as np temperature = 1.0 repetition_penalty = 1.0 perform_fewshot = False few_shot_examples = 6 use_gpu = True batch_size = 1 def generate_and_tokenize_instruction(sample,return_tensors=None): prompt = f"""Abaixo encontrase unha instrución que describe unha tarefa, xunto cunha entrada que proporciona máis contexto. Escribe unha resposta que responda adecuadamente a entrada. ### Instrución: {sample['instruction']} ### Entrada: {sample['input']} ### Resposta: {sample['output']}\n""" instruction = tokenizer(prompt,return_tensors=return_tensors) return instruction def generate(model,tokenizer,instruction,context,streamer): prompt = generate_and_tokenize_instruction({'instruction':instruction,'input':context,'output':''},return_tensors="pt") input_ids = prompt["input_ids"]#.to('cuda:0') generation_output = model.generate( input_ids=input_ids, generation_config=GenerationConfig(do_sample=True), return_dict_in_generate=True, output_scores=True, max_new_tokens=max_length, temperature=temperature, top_k=10, repetition_penalty=repetition_penalty, eos_token_id=tokenizer.eos_token_id, streamer=streamer ) outputs = [] for o in generation_output.sequences: output = tokenizer.decode(o) output = output.replace("Abaixo encontrase unha instrución que describe unha tarefa, xunto cunha entrada que proporciona máis contexto. \nEscribe unha resposta que responda adecuadamente a entrada.\n",'') output = output.split("###")[3].split("<|endoftext|>", 1)[0] return output @st.cache_resource def get_model(): # Create a model of the specified type. return AutoModelForCausalLM.from_pretrained("lagy/carballo-1.3b-finetuned") @st.cache_resource def get_tokenizer(): # Create a model of the specified type. return AutoTokenizer.from_pretrained("lagy/carballo-1.3b-finetuned") st.write(" ") instruction = st.text_area('Instrucción') context = st.text_area('Contexto') max_length = st.number_input('Max generation length',value=120) model = get_model() tokenizer = get_tokenizer() model.eval() if 'reply' not in st.session_state: st.session_state['reply'] = '' if st.button('Generate'): streamer = TextIteratorStreamer(tokenizer) thread = Thread(target=generate,args=(model,tokenizer,instruction,context,streamer)) thread.start() st.write("Generating....") next(streamer) def iterator(): for new_text in streamer: new_text = new_text.split("<|endoftext|>", 1)[0] yield new_text st.write_stream(iterator) #for new_text in streamer: #i += 1 #if (i != 1): #new_text = new_text.split("<|endoftext|>", 1)[0] #text += new_text