import gradio as gr import random import os import copy import torch from huggingface_hub import login from transformers import pipeline from transformers import GPT2Tokenizer, GPT2LMHeadModel,set_seed from transformers import AutoTokenizer, AutoModelWithLMHead,AutoModelForSeq2SeqLM import datetime import nltk nltk.download('stopwords') nltk.download('punctuation') nltk.download('punkt') from rake_nltk import Rake login(os.environ["HF_TOKEN"]) #https://huggingface.co/facebook/opt-1.3b #generator = pipeline('text-generation', model="microsoft/DialoGPT-medium") # dt stores the current date and time dt = datetime.datetime.now() print(dt) print("loading models") # tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium') # original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium') # untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500') # question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap") # question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap") # paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws") # paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws") tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows") original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows") untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500',cache_dir="G:\My Drive\Avatar\language_models_windows") question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows") question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows") paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows") paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows") # tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\language_models_windows") # original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows") # untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows") # question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows") # question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows") # paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows") # paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows") default_temperature=0.01 default_seed=43 def create_response(input_str, temperature, seed, model_name): print("input_str="+input_str) print("model_name="+str(model_name)) print("temperature" + str(temperature)) seed=int(seed) print("seed" + str(seed)) input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt") #= encoded["input_ids"] #attention_mask = encoded["attention_mask"] if seed != -1: set_seed(seed) if model_name == "original_model": output = original_model.generate(input_ids, max_length=100, temperature=temperature, pad_token_id=tokenizer.eos_token_id) #elif model_name == "untethered_model": else: output = original_model.generate(input_ids, max_length=100, temperature=temperature, pad_token_id=tokenizer.eos_token_id ) outputs = model_name+" generated
" sentence = tokenizer.decode(output[0], skip_special_tokens=True) outputs = outputs + sentence+ "
" return outputs common_examples_string="
Sample Inputs:
What is death?
One of the best teachers in all of life turns out to be what?
what is your most meaningful relationship?
What actually gives life meaning?
" interface_original = gr.Interface(fn=create_response, title="original", description="original language model, no fine tuning"+common_examples_string, #examples=examples, inputs=[ gr.Textbox(label="input text here", lines=3), # gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " + # "likely ones (specified in num_beams)", value=7), gr.Number( label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" + " (e.g., 1.6) results in more diverse and creative output, while a lower temperature (e.g., 0.02)" + " makes the output more deterministic and focused", value=default_temperature), gr.Number( label="seed (integer) random seed, set to -1 to use a random seed everytime", value=default_seed), gr.Textbox(label="model", lines=3, value="original_model",visible=False) ], outputs="html" ) interface_untethered_model = gr.Interface(fn=create_response, title="untethered model", description="language model fine tuned with'The Untethered Soul' chapter 17"+common_examples_string, #examples=examples, inputs=[ gr.Textbox(label="input text here", lines=3), gr.Number( label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" + " (e.g., 1.6) results in more diverse and creative output, while a lower temperature (e.g., 0.02)" + " makes the output more deterministic and focused", value=default_temperature), gr.Number( label="seed (integer) random seed, set to -1 to use a random seed everytime", value=default_seed), gr.Textbox(label="model", lines=3, value="untethered_model",visible=False) ], outputs="html" ) demo = gr.TabbedInterface([interface_original, interface_untethered_model], ["Original", "Untethered"]) demo.launch()