testchatbot / app.py
zmbfeng's picture
fix spacing
8649c10
raw
history blame
13 kB
import gradio as gr
import random
import os
import copy
from huggingface_hub import login
from transformers import pipeline
from transformers import GPT2Tokenizer, GPT2LMHeadModel
login(os.environ["HF_TOKEN"])
#https://huggingface.co/facebook/opt-1.3b
#generator = pipeline('text-generation', model="microsoft/DialoGPT-medium")
tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
untethered_paraphrased_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240227_epochs_350')
def create_response(input_str,
# num_beams,
num_return_sequences,
temperature,
repetition_penalty,
top_p,
# top_k,
do_sample,
model_name):
print("input_str="+input_str)
print("model_name="+str(model_name))
# num_beams = int(num_beams)
# print("num_beams=" + str(num_beams))
num_return_sequences=int(num_return_sequences)
print("num_return_sequences" + str(num_return_sequences))
print("top_p" + str(top_p))
# top_k=int(top_k)
# print("top_k" + str(top_k))
print("repetition_penalty" + str(repetition_penalty))
print("temperature" + str(temperature))
print("do_sample" + str(do_sample))
if not do_sample:
num_beams = 1
print("num_beams=" + str(num_beams))
encoded = tokenizer.encode_plus(input_str + tokenizer.eos_token, return_tensors="pt")
input_ids = encoded["input_ids"]
attention_mask = encoded["attention_mask"]
if model_name == "original_model":
output_ids = original_model.generate(input_ids,pad_token_id=tokenizer.eos_token_id,do_sample=do_sample, attention_mask=attention_mask, max_length=100, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences )
elif model_name == "untethered_model":
output_ids = untethered_model.generate(input_ids,pad_token_id=tokenizer.eos_token_id,do_sample=do_sample, attention_mask=attention_mask, max_length=100, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences )
elif model_name == "untethered_paraphrased_model":
output_ids = untethered_paraphrased_model.generate(input_ids,pad_token_id=tokenizer.eos_token_id,do_sample=do_sample, attention_mask=attention_mask, max_length=100, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences )
else:
output_ids = original_model.generate(input_ids,pad_token_id=tokenizer.eos_token_id,do_sample=do_sample, attention_mask=attention_mask, max_length=100, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences )
outputs = model_name+" generated <br>"
for output_id in output_ids:
output = tokenizer.decode(output_id, skip_special_tokens=True)
outputs=outputs+output+"<br/>"
return outputs
common_examples=[
["What is death?",5,0.2,1.5,0.9,True], # The first example
["One of the best teachers in all of life turns out to be what?",5,0.2,1.5,0.9,True], # The second example
["what is your most meaningful relationship?",5,0.2,1.5,0.9,True], # The third example
["What actually gives life meaning?",5,0.2,1.5,0.9,True]
]
examples = copy.deepcopy(common_examples)
print(examples)
for example in examples:
example.append("original_model")
print(examples)
interface_original = gr.Interface(fn=create_response,
title="original",
description="original language model, no fine tuning",
examples=examples,
inputs=[
gr.Textbox(label="input text here", lines=3),
# gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
# "likely ones (specified in num_beams)", value=7),
gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output",
value=5),
gr.Number(
label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
" (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
" makes the output more deterministic and focused",
value=0.2),
gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
"making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
"in more varied and non-repetitive output.",
value=1.5),
gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
" to reach a certain threshold",
value=0.9),
# gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
# "This means that only the tokens with the highest probabilities are considered for sampling" +
# "This reduces the diversity of the generated sequences, "+
# "but also makes them more likely to be coherent and fluent.",
# value=50),
gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
"which means that it will select the word with the highest probability at each step. " +
"This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
"If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
" select a word from the probability distribution at each step. This results in a more diverse and creative" +
" output, but it might also introduce errors and inconsistencies ", value=True),
gr.Textbox(label="model", lines=3, value="original_model",visible=False)
],
outputs=[gr.Textbox(label="output response", lines=30)]
)
examples = copy.deepcopy(common_examples)
print(examples)
for example in examples:
example.append("untethered_model")
print(examples)
interface_untethered_model = gr.Interface(fn=create_response,
title="untethered model",
description="language model fine tuned with'The Untethered Soul' chapter 17",
examples=examples,
inputs=[
gr.Textbox(label="input text here", lines=3),
# gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
# "likely ones (specified in num_beams)", value=7),
gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output",
value=5),
gr.Number(
label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
" (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
" makes the output more deterministic and focused",
value=0.2),
gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
"making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
"in more varied and non-repetitive output.",
value=1.5),
gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
" to reach a certain threshold",
value=0.9),
# gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
# "This means that only the tokens with the highest probabilities are considered for sampling" +
# "This reduces the diversity of the generated sequences, "+
# "but also makes them more likely to be coherent and fluent.",
# value=50),
gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
"which means that it will select the word with the highest probability at each step. " +
"This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
"If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
" select a word from the probability distribution at each step. This results in a more diverse and creative" +
" output, but it might also introduce errors and inconsistencies ", value=True),
gr.Textbox(label="model", lines=3, value="untethered_model",visible=False)
],
outputs=[gr.Textbox(label="output response", lines=30)]
)
examples = copy.deepcopy(common_examples)
print(examples)
for example in examples:
example.append("untethered_paraphrased_model")
print(examples)
interface_untethered_paraphrased_model = gr.Interface(fn=create_response,
title="untethered paraphrased_model",
description="language model fine tuned with'The Untethered Soul' chapter 17 paraphrased",
examples=examples,
inputs=[
gr.Textbox(label="input text here", lines=3),
# gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
# "likely ones (specified in num_beams)", value=7),
gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output",
value=5),
gr.Number(
label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
" (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
" makes the output more deterministic and focused",
value=0.2),
gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
"making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
"in more varied and non-repetitive output.",
value=1.5),
gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
" to reach a certain threshold",
value=0.9),
# gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
# "This means that only the tokens with the highest probabilities are considered for sampling" +
# "This reduces the diversity of the generated sequences, "+
# "but also makes them more likely to be coherent and fluent.",
# value=50),
gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
"which means that it will select the word with the highest probability at each step. " +
"This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
"If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
" select a word from the probability distribution at each step. This results in a more diverse and creative" +
" output, but it might also introduce errors and inconsistencies ", value=True),
gr.Textbox(label="model", lines=3, value="untethered_paraphrased_model",visible=False)
],
outputs= [gr.Textbox(label="output response", lines=30)]
)
demo = gr.TabbedInterface([interface_original, interface_untethered_model, interface_untethered_paraphrased_model], ["Original", "Untethered", "Untethered paraphrased"])
demo.launch()