File size: 2,797 Bytes
5666a5c
 
 
af7e3ad
 
fc4600f
5666a5c
af7e3ad
c4709fb
5666a5c
af7e3ad
5666a5c
 
af7e3ad
5666a5c
fc4600f
5666a5c
 
 
fc4600f
af7e3ad
5666a5c
 
 
 
af7e3ad
 
 
 
5666a5c
 
 
 
 
3964130
af7e3ad
5666a5c
3964130
af7e3ad
 
77de078
5666a5c
fc4600f
3964130
af7e3ad
5666a5c
 
fc4600f
 
 
 
 
a3e25d7
fc4600f
5666a5c
 
 
 
 
fc4600f
5666a5c
 
 
 
 
 
af7e3ad
5666a5c
 
 
 
 
 
af7e3ad
5666a5c
 
 
 
 
fc4600f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import transformers
import streamlit as st

from transformers import AutoModelForCausalLM, AutoTokenizer

checkpoint = "."
  
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
@st.cache
def load_model(model_name):
    model = AutoModelForCausalLM.from_pretrained(model_name)
    return model

model = load_model(checkpoint)

def infer(input_ids, bad_words_ids, max_tokens, temperature, top_k, top_p):

    output_sequences = model.generate(
        input_ids=input_ids,
        bad_words_ids = bad_words_ids,
        max_new_tokens=max_tokens,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        do_sample=True,
        no_repeat_ngram_size=2,
        early_stopping=True,
        num_beams=4,
        pad_token_id=tokenizer.eos_token_id,
        num_return_sequences=1
    )

    return output_sequences

default_value = "We are building the first ever"
    
#prompts
st.title("Write with vcGPT 🦄")
st.write("This is a LLM that was fine-tuned on a dataset of investment memos to help you generate your next pitch.")

sent = st.text_area("Text", default_value)

max_tokens = st.sidebar.slider("Max Tokens", min_value = 16, max_value=64)
temperature = st.sidebar.slider("Temperature", value = 0.8, min_value = 0.05, max_value=1.0, step=0.05)
top_k = st.sidebar.slider("Top-k", min_value = 0, max_value=5, value = 4)
top_p = st.sidebar.slider("Top-p", min_value = 0.0, max_value=1.0, step = 0.05, value = 0.9)

# print(model.config.max_position_embeddings)

encoded_prompt = tokenizer.encode(tokenizer.eos_token+sent, max_length=1024, return_tensors="pt", truncation=True)

# get tokens of words that should not be generated
bad_words_ids = tokenizer(["confidential", "angel.co", "angellist.com", "angellist"], add_special_tokens=False).input_ids

if encoded_prompt.size()[-1] == 0:
    input_ids = None
else:
    input_ids = encoded_prompt

output_sequences = infer(input_ids, bad_words_ids, max_tokens, temperature, top_k, top_p)

for generated_sequence_idx, generated_sequence in enumerate(output_sequences):
    print(f"=== GENERATED SEQUENCE {generated_sequence_idx + 1} ===")
    generated_sequences = generated_sequence.tolist()

    # Decode text
    text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True, skip_special_tokens=True)

    # Remove all text after the stop token
    #text = text[: text.find(args.stop_token) if args.stop_token else None]

    # Add the prompt at the beginning of the sequence. Remove the excess text that was used for pre-processing
    total_sequence = (
        sent + text[len(tokenizer.decode(encoded_prompt[0], clean_up_tokenization_spaces=True, skip_special_tokens=True)) :]
    )

    generated_sequences.append(total_sequence)
    print(total_sequence)

st.markdown(generated_sequences[-1])