File size: 3,123 Bytes
c737cc1
 
4f4929c
 
98ac2f8
 
7f4f3b5
 
98ac2f8
 
aef779b
 
 
 
 
 
 
 
2bf2ced
aef779b
2bf2ced
aef779b
 
 
 
 
 
 
 
 
d51e55d
aef779b
 
 
 
 
 
 
 
 
a580207
7d20bf7
d51e55d
 
aef779b
 
 
 
 
 
94699d8
aef779b
934df69
f5e0ea2
 
b39018a
f5e0ea2
934df69
f5e0ea2
 
b39018a
f5e0ea2
aef779b
 
ae4e0d0
9fe86ad
f2bcf60
c737cc1
aef779b
f5e0ea2
 
7f4f3b5
 
e4335ae
 
aef779b
7d20bf7
f7228e9
 
48e043b
7f4f3b5
f7228e9
 
f33590a
 
 
adc0c9b
 
 
 
 
 
3dce861
 
 
 
 
 
66d7857
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import streamlit as st

import torch

from transformers import AutoTokenizer, GenerationConfig
from transformers import AutoModelForCausalLM
from transformers import TextIteratorStreamer
from threading import Thread
import numpy as np

temperature = 1.0
repetition_penalty = 1.0
perform_fewshot = False
few_shot_examples = 6
use_gpu = True
batch_size = 1

def generate_and_tokenize_instruction(sample,return_tensors=None):
    prompt = f"""Abaixo encontrase unha instruci贸n que describe unha tarefa, xunto cunha entrada que proporciona m谩is contexto. 
Escribe unha resposta que responda adecuadamente a entrada.
### Instruci贸n:
{sample['instruction']}
### Entrada:
{sample['input']}
### Resposta:
{sample['output']}\n"""
    instruction = tokenizer(prompt,return_tensors=return_tensors)
    return instruction

        
def generate(model,tokenizer,instruction,context,streamer):
    prompt = generate_and_tokenize_instruction({'instruction':instruction,'input':context,'output':''},return_tensors="pt")
    input_ids = prompt["input_ids"]#.to('cuda:0')
    generation_output = model.generate(
            input_ids=input_ids,
            generation_config=GenerationConfig(do_sample=True),
            return_dict_in_generate=True,
            output_scores=True,
            max_new_tokens=max_length,
            temperature=temperature,
            top_k=10,
            repetition_penalty=repetition_penalty,
            eos_token_id=tokenizer.eos_token_id,
            streamer=streamer
        )
    outputs = []
    for o in generation_output.sequences:
            output = tokenizer.decode(o)
            output = output.replace("Abaixo encontrase unha instruci脙鲁n que describe unha tarefa, xunto cunha entrada que proporciona m脙隆is contexto. \nEscribe unha resposta que responda adecuadamente a entrada.\n",'')
            output = output.split("###")[3].split("<|endoftext|>", 1)[0]
            return output

@st.cache_resource
def get_model():
    # Create a model of the specified type.
    return AutoModelForCausalLM.from_pretrained("lagy/carballo-1.3b-finetuned")

@st.cache_resource
def get_tokenizer():
    # Create a model of the specified type.
    return AutoTokenizer.from_pretrained("lagy/carballo-1.3b-finetuned")

st.write(" ")

instruction = st.text_area('Instrucci贸n')
context = st.text_area('Contexto')
max_length = st.number_input('Max generation length',value=120)


model = get_model()
tokenizer = get_tokenizer()
model.eval()

if 'reply' not in st.session_state:
    st.session_state['reply'] = ''

if st.button('Generate'):
    streamer = TextIteratorStreamer(tokenizer)
    thread = Thread(target=generate,args=(model,tokenizer,instruction,context,streamer))

    
    thread.start()
    st.write("Generating....")

    next(streamer)

    def iterator():
        for new_text in streamer:
            new_text = new_text.split("<|endoftext|>", 1)[0]
            yield new_text

    st.write_stream(iterator)
    
    #for new_text in streamer:
        #i += 1
        #if (i != 1):
            #new_text = new_text.split("<|endoftext|>", 1)[0]
            #text += new_text