File size: 6,794 Bytes
a1e5063
94aba93
fa94b9c
cde20ed
f9b7274
fa94b9c
94aba93
004631e
ec8b3ca
a939e85
ec8b3ca
 
 
 
 
d72eb57
fa94b9c
d72eb57
 
94aba93
5a521e1
a939e85
 
 
 
 
9b51486
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d9386f
 
 
 
 
 
 
 
d72eb57
bee79a3
9b51486
 
 
 
 
 
 
 
 
 
 
 
d72eb57
1ac4018
d72eb57
57ca910
1ac4018
1d40f83
1ac4018
4aa8f13
d72eb57
e691fb4
d72eb57
 
e691fb4
0ff9149
a16ea65
004631e
d72eb57
 
 
 
 
0ebfc8a
d72eb57
 
8649c10
d72eb57
 
 
6882935
94aba93
e551817
e1cefc8
e90d0d7
9b51486
 
 
c6e7ac9
07cc877
004631e
9b51486
 
c6e7ac9
9c6e8e6
0ebfc8a
e90d0d7
c6e7ac9
20645d3
d72eb57
0ebfc8a
f6f9adc
 
c6e7ac9
b348524
c6e7ac9
 
d72eb57
c6e7ac9
b348524
004631e
 
 
c6e7ac9
 
9c6e8e6
0ebfc8a
 
 
 
d72eb57
aa9d7ef
94aba93
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import gradio as gr
import random
import os
import copy
import torch
from huggingface_hub import login
from transformers import pipeline
from transformers import GPT2Tokenizer, GPT2LMHeadModel,set_seed
from transformers import AutoTokenizer, AutoModelWithLMHead,AutoModelForSeq2SeqLM
import datetime
import nltk
nltk.download('stopwords')
nltk.download('punctuation')
nltk.download('punkt')
from rake_nltk import Rake

login(os.environ["HF_TOKEN"])


#https://huggingface.co/facebook/opt-1.3b
#generator = pipeline('text-generation', model="microsoft/DialoGPT-medium")

# dt stores the current date and time
dt = datetime.datetime.now()
print(dt)
print("loading models")
tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")

# tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows")
# original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows")
# untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500',cache_dir="G:\My Drive\Avatar\language_models_windows")
# question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
# paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")

# tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\language_models_windows")
# original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
# paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
default_temperature=0.01
default_seed=43
def create_response_question_generation(input_str, max_length=64):
    input_text = "answer: %s  context: %s </s>" % (input_str, input_str)
    print(f"create question input_text={input_text}")
    features = question_generation_tokenizer([input_text], return_tensors='pt')

    output = question_generation_model.generate(input_ids=features['input_ids'],
                                                attention_mask=features['attention_mask'],
                                                max_length=max_length)

    return question_generation_tokenizer.decode(output[0])


def create_response(input_str,
                             temperature,
                             seed,
                             model_name):
  print("input_str="+input_str)
  print("model_name="+str(model_name))
  print("temperature" + str(temperature))
  seed=int(seed)
  print("seed" + str(seed))
  input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt")


   #= encoded["input_ids"]
  #attention_mask = encoded["attention_mask"]

  if seed != -1:
    set_seed(seed)
  if model_name == "original_model":
    output = original_model.generate(input_ids, max_length=100, temperature=temperature, pad_token_id=tokenizer.eos_token_id)

  #elif model_name == "untethered_model":
  else:
    output = original_model.generate(input_ids, max_length=100, temperature=temperature, pad_token_id=tokenizer.eos_token_id )

  outputs = model_name+" generated <br>"
  sentence = tokenizer.decode(output[0], skip_special_tokens=True)
  outputs = outputs + sentence+  "<br/>"

  return outputs


common_examples_string="<br/>Sample Inputs:<br/>What is death?<br/>One of the best teachers in all of life turns out to be what?<br/>what is your most meaningful relationship?<br/>What actually gives life meaning?<br/>"

interface_original = gr.Interface(fn=create_response_question_generation,
    title="Question Generation",
    description="Enter a statment like Paris is the captial of France",
    inputs=[
    gr.Textbox(label="input text here", lines=3),
        gr.Number(
            label="max length",
                value=64),
    ],
    outputs="html"
    )



interface_untethered_model = gr.Interface(fn=create_response,
    title="untethered model",
    description="language model fine tuned with'The Untethered Soul' chapter 17"+common_examples_string,
    #examples=examples,
    inputs=[
   gr.Textbox(label="input text here", lines=3),
        gr.Number(
            label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
                  " (e.g., 1.6) results in more diverse and creative output, while a lower temperature (e.g., 0.02)" +
                  " makes the output more deterministic and focused",
            value=default_temperature),
        gr.Number(
            label="seed (integer) random seed, set to -1 to use a random seed everytime",
                value=default_seed),
        gr.Textbox(label="model", lines=3, value="untethered_model",visible=False)
    ],
    outputs="html"
    )



demo = gr.TabbedInterface([interface_original, interface_untethered_model], ["Original", "Untethered"])

demo.launch()