Spaces:
Runtime error
Runtime error
import streamlit as st | |
import csv | |
st.set_page_config(page_title="Reassuring Parables") | |
st.title("Reassuring Parables generator - by Allen Roush") | |
st.caption("Find me on Linkedin: https://www.linkedin.com/in/allen-roush-27721011b/") | |
st.image("https://imgs.xkcd.com/comics/reassuring.png") | |
st.caption("From https://xkcd.com/1263/") | |
# instantiate | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
# load (supports t5, mt5, byT5 models) | |
#model.from_pretrained("t5","t5-base") | |
source_text = ["Computers will never", | |
"Computers will never", | |
"Computers will never", | |
"Computers will never", | |
"Computers will never", | |
"Computers will never", | |
"Computers will never", | |
"Computers will never", | |
"Computers will never", | |
"Computers will never", | |
"Computers will never", | |
"Computers will never", | |
"Computers will never", | |
"Computers will never", | |
"Computers will never",] | |
target_text = ["Computers will never understand a sonnet", | |
"Computers will never enjoy a salad", | |
"Computers will never know how to love", | |
"Computers will never know how to smell", | |
"Computers will never have a sense of being", | |
"Computers will never feel", | |
"Computers will never appreciate art", | |
"Computers will never have good manners", | |
"Computers will never understand god", | |
"Computers will never solve the halting problem", | |
"Computers will never be conscious", | |
"Computers will never prove that they aren't P-zombies", | |
"Computers will never replace the human brain", | |
"Computers will never write better reassuring parables than humans" | |
"Computers will never replace humans"] | |
#full_df = pd.DataFrame(list(zip(source_text, target_text)), columns = ["source_text", "target_text"]) | |
#print(full_df) | |
#train_df, eval_df = train_test_split(full_df, test_size = 0.2) | |
def train_model(): | |
model.train(train_df=train_df, # pandas dataframe with 2 columns: source_text & target_text | |
eval_df=eval_df, # pandas dataframe with 2 columns: source_text & target_text | |
source_max_token_len = 512, | |
target_max_token_len = 128, | |
batch_size = 1, | |
max_epochs = 4, | |
use_gpu = True, | |
outputdir = "/home/lain/lain/CX_DB8/outputs", | |
early_stopping_patience_epochs = 0, | |
precision = 32 | |
) | |
#train_model() | |
# load trained T5 model | |
with st.spinner("Please wait while the model loads:"): | |
tokenizer = AutoTokenizer.from_pretrained("Hellisotherpeople/T5_Reassuring_Parables") | |
model = AutoModelForSeq2SeqLM.from_pretrained("Hellisotherpeople/T5_Reassuring_Parables") | |
form = st.sidebar.form("choose_settings") | |
form.header("Main Settings") | |
number_of_parables = form.number_input("Select how many reassuring parables you want to generate", value = 20, max_value = 1000) | |
max_length_of_parable = form.number_input("What's the max length of the parable?", value = 20, max_value = 128) | |
min_length_of_parable = form.number_input("What's the min length of the parable?", value = 0, max_value = max_length_of_parable) | |
top_k = form.number_input("What value of K should we use for Top-K sampling? Set to zero to disable", value = 50) | |
form.caption("In Top-K sampling, the K most likely next words are filtered and the probability mass is redistributed among only those K next words. ") | |
top_p = form.number_input("What value of P should we use for Top-p sampling? Set to zero to disable", value = 0.95, max_value = 1.0, min_value = 0.0) | |
form.caption("Top-p sampling chooses from the smallest possible set of words whose cumulative probability exceeds the probability p. The probability mass is then redistributed among this set of words.") | |
temperature = form.number_input("How spicy/interesting do we want our models output to be", value = 1.05, min_value = 0.0) | |
form.caption("Setting this higher decreases the likelihood of high probability words and increases the likelihood of low probability (and presumably more interesting) words") | |
form.caption("For more details on what these settings mean, see here: https://huggingface.co/blog/how-to-generate") | |
form.form_submit_button("Generate some Reassuring Parables!") | |
#seed_value = st.sidebar.number_input("Select a seed value - change this to get different output", 42) ## Doesn't work :( | |
with st.spinner("Generating Reassuring Parables"): | |
input_ids = tokenizer.encode("Computers will never", return_tensors='pt') | |
sample_outputs = model.generate( | |
input_ids, | |
do_sample=True, | |
max_length=max_length_of_parable, | |
min_length=min_length_of_parable, | |
top_k=top_k, | |
top_p=top_p, | |
num_return_sequences=number_of_parables, | |
temperature=temperature | |
) | |
#pl.seed_everything(seed_value) | |
list_of_parables = [] | |
for i, sample_output in enumerate(sample_outputs): | |
list_of_parables.append(tokenizer.decode(sample_output, skip_special_tokens=True)) | |
st.write(list_of_parables) | |