zmbfeng commited on
Commit
9b51486
·
1 Parent(s): e90d0d7

question generation

Browse files
Files changed (1) hide show
  1. app.py +32 -29
app.py CHANGED
@@ -24,21 +24,21 @@ login(os.environ["HF_TOKEN"])
24
  dt = datetime.datetime.now()
25
  print(dt)
26
  print("loading models")
27
- # tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
28
- # original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
29
- # untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
30
- # question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
31
- # question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
32
- # paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
33
- # paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
34
-
35
- tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows")
36
- original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows")
37
- untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500',cache_dir="G:\My Drive\Avatar\language_models_windows")
38
- question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
39
- question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
40
- paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
41
- paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
42
 
43
  # tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\language_models_windows")
44
  # original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
@@ -49,6 +49,18 @@ paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Pa
49
  # paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
50
  default_temperature=0.01
51
  default_seed=43
 
 
 
 
 
 
 
 
 
 
 
 
52
  def create_response(input_str,
53
  temperature,
54
  seed,
@@ -82,23 +94,14 @@ def create_response(input_str,
82
 
83
  common_examples_string="<br/>Sample Inputs:<br/>What is death?<br/>One of the best teachers in all of life turns out to be what?<br/>what is your most meaningful relationship?<br/>What actually gives life meaning?<br/>"
84
 
85
- interface_original = gr.Interface(fn=create_response,
86
- title="original",
87
- description="original language model, no fine tuning"+common_examples_string,
88
- #examples=examples,
89
  inputs=[
90
  gr.Textbox(label="input text here", lines=3),
91
- # gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
92
- # "likely ones (specified in num_beams)", value=7),
93
- gr.Number(
94
- label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
95
- " (e.g., 1.6) results in more diverse and creative output, while a lower temperature (e.g., 0.02)" +
96
- " makes the output more deterministic and focused",
97
- value=default_temperature),
98
  gr.Number(
99
- label="seed (integer) random seed, set to -1 to use a random seed everytime",
100
- value=default_seed),
101
- gr.Textbox(label="model", lines=3, value="original_model",visible=False)
102
  ],
103
  outputs="html"
104
  )
 
24
  dt = datetime.datetime.now()
25
  print(dt)
26
  print("loading models")
27
+ tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
28
+ original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
29
+ untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
30
+ question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
31
+ question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap")
32
+ paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
33
+ paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
34
+
35
+ # tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows")
36
+ # original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="G:\My Drive\Avatar\language_models_windows")
37
+ # untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500',cache_dir="G:\My Drive\Avatar\language_models_windows")
38
+ # question_generation_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
39
+ # question_generation_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-question-generation-ap",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
40
+ # paraphrase_tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
41
+ # paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="G:\\My Drive\\Avatar\\language_models_windows")
42
 
43
  # tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\language_models_windows")
44
  # original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium',cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
 
49
  # paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws",cache_dir="C:\\Users\\zmbfeng\\Google Drive\\Avatar\\language_models_windows")
50
  default_temperature=0.01
51
  default_seed=43
52
+ def create_response_question_generation(input_str, max_length=64):
53
+ input_text = "answer: %s context: %s </s>" % (input_str, input_str)
54
+ print(f"create question input_text={input_text}")
55
+ features = question_generation_tokenizer([input_text], return_tensors='pt')
56
+
57
+ output = question_generation_model.generate(input_ids=features['input_ids'],
58
+ attention_mask=features['attention_mask'],
59
+ max_length=max_length)
60
+
61
+ return question_generation_tokenizer.decode(output[0])
62
+
63
+
64
  def create_response(input_str,
65
  temperature,
66
  seed,
 
94
 
95
  common_examples_string="<br/>Sample Inputs:<br/>What is death?<br/>One of the best teachers in all of life turns out to be what?<br/>what is your most meaningful relationship?<br/>What actually gives life meaning?<br/>"
96
 
97
+ interface_original = gr.Interface(fn=create_response_question_generation,
98
+ title="Question Generation",
99
+ description="Enter a statment like Paris is the captial of France",
 
100
  inputs=[
101
  gr.Textbox(label="input text here", lines=3),
 
 
 
 
 
 
 
102
  gr.Number(
103
+ label="max length",
104
+ value=64),
 
105
  ],
106
  outputs="html"
107
  )