Spaces:
Sleeping
Sleeping
File size: 2,834 Bytes
5666a5c af7e3ad fd3e5e5 fc4600f 5666a5c af7e3ad c3619f4 5666a5c af7e3ad 5666a5c af7e3ad 5666a5c fd3e5e5 fc4600f 5666a5c fc4600f af7e3ad 5666a5c af7e3ad 5666a5c 3964130 af7e3ad 5666a5c 3964130 af7e3ad 77de078 5666a5c fc4600f 3964130 af7e3ad 5666a5c fc4600f a3e25d7 fc4600f 5666a5c fc4600f 5666a5c af7e3ad 5666a5c af7e3ad 5666a5c fc4600f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import transformers
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
import spaces
checkpoint = "."
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
@st.cache_resource
def load_model(model_name):
model = AutoModelForCausalLM.from_pretrained(model_name)
return model
model = load_model(checkpoint)
@spaces.GPU
def infer(input_ids, bad_words_ids, max_tokens, temperature, top_k, top_p):
output_sequences = model.generate(
input_ids=input_ids,
bad_words_ids = bad_words_ids,
max_new_tokens=max_tokens,
temperature=temperature,
top_k=top_k,
top_p=top_p,
do_sample=True,
no_repeat_ngram_size=2,
early_stopping=True,
num_beams=4,
pad_token_id=tokenizer.eos_token_id,
num_return_sequences=1
)
return output_sequences
default_value = "We are building the first ever"
#prompts
st.title("Write with vcGPT 🦄")
st.write("This is a LLM that was fine-tuned on a dataset of investment memos to help you generate your next pitch.")
sent = st.text_area("Text", default_value)
max_tokens = st.sidebar.slider("Max Tokens", min_value = 16, max_value=64)
temperature = st.sidebar.slider("Temperature", value = 0.8, min_value = 0.05, max_value=1.0, step=0.05)
top_k = st.sidebar.slider("Top-k", min_value = 0, max_value=5, value = 4)
top_p = st.sidebar.slider("Top-p", min_value = 0.0, max_value=1.0, step = 0.05, value = 0.9)
# print(model.config.max_position_embeddings)
encoded_prompt = tokenizer.encode(tokenizer.eos_token+sent, max_length=1024, return_tensors="pt", truncation=True)
# get tokens of words that should not be generated
bad_words_ids = tokenizer(["confidential", "angel.co", "angellist.com", "angellist"], add_special_tokens=False).input_ids
if encoded_prompt.size()[-1] == 0:
input_ids = None
else:
input_ids = encoded_prompt
output_sequences = infer(input_ids, bad_words_ids, max_tokens, temperature, top_k, top_p)
for generated_sequence_idx, generated_sequence in enumerate(output_sequences):
print(f"=== GENERATED SEQUENCE {generated_sequence_idx + 1} ===")
generated_sequences = generated_sequence.tolist()
# Decode text
text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True, skip_special_tokens=True)
# Remove all text after the stop token
#text = text[: text.find(args.stop_token) if args.stop_token else None]
# Add the prompt at the beginning of the sequence. Remove the excess text that was used for pre-processing
total_sequence = (
sent + text[len(tokenizer.decode(encoded_prompt[0], clean_up_tokenization_spaces=True, skip_special_tokens=True)) :]
)
generated_sequences.append(total_sequence)
print(total_sequence)
st.markdown(generated_sequences[-1]) |