testchatbot / app.py
zmbfeng's picture
make sure integers
87ff15f
raw
history blame
7.55 kB
import gradio as gr
import random
import os
import copy
import torch
from huggingface_hub import login
from transformers import pipeline
from transformers import GPT2Tokenizer, GPT2LMHeadModel,set_seed
from transformers import AutoTokenizer, AutoModelWithLMHead,AutoModelForSeq2SeqLM
import datetime
import nltk
nltk.download('stopwords')
nltk.download('punctuation')
nltk.download('punkt')
from rake_nltk import Rake
login(os.environ["HF_TOKEN"])
#https://huggingface.co/facebook/opt-1.3b
#generator = pipeline('text-generation', model="microsoft/DialoGPT-medium")
# dt stores the current date and time
dt = datetime.datetime.now()
print(dt)
print("loading models")
tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
# tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows")
# original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows")
# untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500',cache_dir="G:\My Drive\Avatar\language_models_windows")
# question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\language_models_windows")
# original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
default_temperature=0.01
default_seed=43
def create_response_question_generation(input_str, max_length=64):
input_text = "answer: %s context: %s </s>" % (input_str, input_str)
print(f"create question input_text={input_text}")
features = question_generation_tokenizer([input_text], return_tensors='pt')
output = question_generation_model.generate(input_ids=features['input_ids'],
attention_mask=features['attention_mask'],
max_length=max_length)
return question_generation_tokenizer.decode(output[0])
def create_response_paraphrase(input_str, max_length,num_return_sequences):
text = "paraphrase: " + input_str + " </s>"
encoding = paraphrase_tokenizer.encode_plus(text, pad_to_max_length=True, return_tensors="pt")
input_ids, attention_masks = encoding["input_ids"], encoding["attention_mask"]
num_return_sequences = int(num_return_sequences) # Ensure this is an integer
max_length = int(max_length)
outputs = paraphrase_model.generate(
input_ids=input_ids, attention_mask=attention_masks,
# max_length=256,
max_length=max_length,
do_sample=True,
top_k=120,
top_p=0.95,
early_stopping=True,
num_return_sequences=num_return_sequences,
repetition_penalty=1.5
)
result_output_str=""
for output in outputs:
line = paraphrase_tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
result_output_str=result_output_str+line+"<br/>"
# results.append(line)
# return results
return result_output_str
def create_response(input_str,
temperature,
seed,
model_name):
print("input_str="+input_str)
print("model_name="+str(model_name))
print("temperature" + str(temperature))
seed=int(seed)
print("seed" + str(seed))
input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt")
#= encoded["input_ids"]
#attention_mask = encoded["attention_mask"]
if seed != -1:
set_seed(seed)
if model_name == "original_model":
output = original_model.generate(input_ids, max_length=100, temperature=temperature, pad_token_id=tokenizer.eos_token_id)
#elif model_name == "untethered_model":
else:
output = original_model.generate(input_ids, max_length=100, temperature=temperature, pad_token_id=tokenizer.eos_token_id )
outputs = model_name+" generated <br>"
sentence = tokenizer.decode(output[0], skip_special_tokens=True)
outputs = outputs + sentence+ "<br/>"
return outputs
common_examples_string="<br/>Sample Inputs:<br/>What is death?<br/>One of the best teachers in all of life turns out to be what?<br/>what is your most meaningful relationship?<br/>What actually gives life meaning?<br/>"
interface_original = gr.Interface(fn=create_response_question_generation,
title="Question Generation",
description="Enter a statment like Paris is the captial of France",
inputs=[
gr.Textbox(label="input text here", lines=3),
gr.Number(
label="max length",
value=64),
],
outputs="html"
)
interface_untethered_model = gr.Interface(fn=create_response_paraphrase,
title="Paraphrase",
description="Paraphrase sentences",
#examples=examples,
inputs=[
gr.Textbox(label="input text here", lines=3, value="It is truly a great cosmic paradox that one of the best teachers in all of life turns out to be death. No person or situation could ever teach you as much as death has to teach you. "),
gr.Number(
label="max length",
value=512),
gr.Number(
label="num of responses",
value=2)
],
outputs="html"
)
demo = gr.TabbedInterface([interface_original, interface_untethered_model], ["Original", "Untethered"])
demo.launch()