testchatbot / app.py
zmbfeng's picture
question generation
9b51486
raw
history blame
6.79 kB
import gradio as gr
import random
import os
import copy
import torch
from huggingface_hub import login
from transformers import pipeline
from transformers import GPT2Tokenizer, GPT2LMHeadModel,set_seed
from transformers import AutoTokenizer, AutoModelWithLMHead,AutoModelForSeq2SeqLM
import datetime
import nltk
nltk.download('stopwords')
nltk.download('punctuation')
nltk.download('punkt')
from rake_nltk import Rake
login(os.environ["HF_TOKEN"])
#https://huggingface.co/facebook/opt-1.3b
#generator = pipeline('text-generation', model="microsoft/DialoGPT-medium")
# dt stores the current date and time
dt = datetime.datetime.now()
print(dt)
print("loading models")
tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
# tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows")
# original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows")
# untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500',cache_dir="G:\My Drive\Avatar\language_models_windows")
# question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\language_models_windows")
# original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
default_temperature=0.01
default_seed=43
def create_response_question_generation(input_str, max_length=64):
input_text = "answer: %s context: %s </s>" % (input_str, input_str)
print(f"create question input_text={input_text}")
features = question_generation_tokenizer([input_text], return_tensors='pt')
output = question_generation_model.generate(input_ids=features['input_ids'],
attention_mask=features['attention_mask'],
max_length=max_length)
return question_generation_tokenizer.decode(output[0])
def create_response(input_str,
temperature,
seed,
model_name):
print("input_str="+input_str)
print("model_name="+str(model_name))
print("temperature" + str(temperature))
seed=int(seed)
print("seed" + str(seed))
input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt")
#= encoded["input_ids"]
#attention_mask = encoded["attention_mask"]
if seed != -1:
set_seed(seed)
if model_name == "original_model":
output = original_model.generate(input_ids, max_length=100, temperature=temperature, pad_token_id=tokenizer.eos_token_id)
#elif model_name == "untethered_model":
else:
output = original_model.generate(input_ids, max_length=100, temperature=temperature, pad_token_id=tokenizer.eos_token_id )
outputs = model_name+" generated <br>"
sentence = tokenizer.decode(output[0], skip_special_tokens=True)
outputs = outputs + sentence+ "<br/>"
return outputs
common_examples_string="<br/>Sample Inputs:<br/>What is death?<br/>One of the best teachers in all of life turns out to be what?<br/>what is your most meaningful relationship?<br/>What actually gives life meaning?<br/>"
interface_original = gr.Interface(fn=create_response_question_generation,
title="Question Generation",
description="Enter a statment like Paris is the captial of France",
inputs=[
gr.Textbox(label="input text here", lines=3),
gr.Number(
label="max length",
value=64),
],
outputs="html"
)
interface_untethered_model = gr.Interface(fn=create_response,
title="untethered model",
description="language model fine tuned with'The Untethered Soul' chapter 17"+common_examples_string,
#examples=examples,
inputs=[
gr.Textbox(label="input text here", lines=3),
gr.Number(
label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
" (e.g., 1.6) results in more diverse and creative output, while a lower temperature (e.g., 0.02)" +
" makes the output more deterministic and focused",
value=default_temperature),
gr.Number(
label="seed (integer) random seed, set to -1 to use a random seed everytime",
value=default_seed),
gr.Textbox(label="model", lines=3, value="untethered_model",visible=False)
],
outputs="html"
)
demo = gr.TabbedInterface([interface_original, interface_untethered_model], ["Original", "Untethered"])
demo.launch()