Spaces:
Sleeping
Sleeping
File size: 6,794 Bytes
a1e5063 94aba93 fa94b9c cde20ed f9b7274 fa94b9c 94aba93 004631e ec8b3ca a939e85 ec8b3ca d72eb57 fa94b9c d72eb57 94aba93 5a521e1 a939e85 9b51486 7d9386f d72eb57 bee79a3 9b51486 d72eb57 1ac4018 d72eb57 57ca910 1ac4018 1d40f83 1ac4018 4aa8f13 d72eb57 e691fb4 d72eb57 e691fb4 0ff9149 a16ea65 004631e d72eb57 0ebfc8a d72eb57 8649c10 d72eb57 6882935 94aba93 e551817 e1cefc8 e90d0d7 9b51486 c6e7ac9 07cc877 004631e 9b51486 c6e7ac9 9c6e8e6 0ebfc8a e90d0d7 c6e7ac9 20645d3 d72eb57 0ebfc8a f6f9adc c6e7ac9 b348524 c6e7ac9 d72eb57 c6e7ac9 b348524 004631e c6e7ac9 9c6e8e6 0ebfc8a d72eb57 aa9d7ef 94aba93 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import gradio as gr
import random
import os
import copy
import torch
from huggingface_hub import login
from transformers import pipeline
from transformers import GPT2Tokenizer, GPT2LMHeadModel,set_seed
from transformers import AutoTokenizer, AutoModelWithLMHead,AutoModelForSeq2SeqLM
import datetime
import nltk
nltk.download('stopwords')
nltk.download('punctuation')
nltk.download('punkt')
from rake_nltk import Rake
login(os.environ["HF_TOKEN"])
#https://huggingface.co/facebook/opt-1.3b
#generator = pipeline('text-generation', model="microsoft/DialoGPT-medium")
# dt stores the current date and time
dt = datetime.datetime.now()
print(dt)
print("loading models")
tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
# tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows")
# original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows")
# untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500',cache_dir="G:\My Drive\Avatar\language_models_windows")
# question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\language_models_windows")
# original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
default_temperature=0.01
default_seed=43
def create_response_question_generation(input_str, max_length=64):
input_text = "answer: %s context: %s </s>" % (input_str, input_str)
print(f"create question input_text={input_text}")
features = question_generation_tokenizer([input_text], return_tensors='pt')
output = question_generation_model.generate(input_ids=features['input_ids'],
attention_mask=features['attention_mask'],
max_length=max_length)
return question_generation_tokenizer.decode(output[0])
def create_response(input_str,
temperature,
seed,
model_name):
print("input_str="+input_str)
print("model_name="+str(model_name))
print("temperature" + str(temperature))
seed=int(seed)
print("seed" + str(seed))
input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt")
#= encoded["input_ids"]
#attention_mask = encoded["attention_mask"]
if seed != -1:
set_seed(seed)
if model_name == "original_model":
output = original_model.generate(input_ids, max_length=100, temperature=temperature, pad_token_id=tokenizer.eos_token_id)
#elif model_name == "untethered_model":
else:
output = original_model.generate(input_ids, max_length=100, temperature=temperature, pad_token_id=tokenizer.eos_token_id )
outputs = model_name+" generated <br>"
sentence = tokenizer.decode(output[0], skip_special_tokens=True)
outputs = outputs + sentence+ "<br/>"
return outputs
common_examples_string="<br/>Sample Inputs:<br/>What is death?<br/>One of the best teachers in all of life turns out to be what?<br/>what is your most meaningful relationship?<br/>What actually gives life meaning?<br/>"
interface_original = gr.Interface(fn=create_response_question_generation,
title="Question Generation",
description="Enter a statment like Paris is the captial of France",
inputs=[
gr.Textbox(label="input text here", lines=3),
gr.Number(
label="max length",
value=64),
],
outputs="html"
)
interface_untethered_model = gr.Interface(fn=create_response,
title="untethered model",
description="language model fine tuned with'The Untethered Soul' chapter 17"+common_examples_string,
#examples=examples,
inputs=[
gr.Textbox(label="input text here", lines=3),
gr.Number(
label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
" (e.g., 1.6) results in more diverse and creative output, while a lower temperature (e.g., 0.02)" +
" makes the output more deterministic and focused",
value=default_temperature),
gr.Number(
label="seed (integer) random seed, set to -1 to use a random seed everytime",
value=default_seed),
gr.Textbox(label="model", lines=3, value="untethered_model",visible=False)
],
outputs="html"
)
demo = gr.TabbedInterface([interface_original, interface_untethered_model], ["Original", "Untethered"])
demo.launch() |