zmbfeng commited on
Commit
1ac4018
·
verified ·
1 Parent(s): 3759c66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -81
app.py CHANGED
@@ -9,7 +9,7 @@ login(os.environ["HF_TOKEN"])
9
  #generator = pipeline('text-generation', model="microsoft/DialoGPT-medium")
10
  tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
11
  original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
12
- #untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
13
  untethered_paraphrased_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240227_epochs_350')
14
 
15
  def create_response_untethered_paraphrased(input_str,
@@ -58,48 +58,48 @@ def create_response_untethered_paraphrased(input_str,
58
 
59
 
60
 
61
- # def create_response_untethered(input_str,
62
- # num_beams,
63
- # num_return_sequences,
64
- # temperature,
65
- # repetition_penalty,
66
- # top_p,
67
- # top_k,
68
- # do_sample):
69
- # print("input_str="+input_str)
70
- # num_beams = int(num_beams)
71
- # print("num_beams=" + str(num_beams))
72
- # num_return_sequences=int(num_return_sequences)
73
- # print("num_return_sequences" + str(num_return_sequences))
74
- # print("top_p" + str(top_p))
75
- # top_k=int(top_k)
76
- # print("top_k" + str(top_k))
77
- # print("repetition_penalty" + str(repetition_penalty))
78
- # print("temperature" + str(temperature))
79
- # print("do_sample" + str(do_sample))
80
- # if not do_sample:
81
- # num_beams = 1
82
- # print("num_beams=" + str(num_beams))
83
 
84
- # #output_raw= generator(input_str)
85
- # """print (output_raw)"""
86
 
87
- # #output_str = output_raw[0]['generated_text']
88
- # #output_str = output_str.replace("\n", "")
89
- # #output_str = output_str.replace(input_str, "")
90
- # #output_str = tokenizer.decode(model.generate(**tokenizer("What are John West's hobbies?"+tokenizer.eos_token,return_tensors="pt",max_length=200))[0])
91
- # # output_str = tokenizer.decode(original_model.generate(**tokenizer(input_str+tokenizer.eos_token,return_tensors="pt",max_length=200),
92
- # # num_beams=num_beams,
93
- # # num_return_sequences=num_return_sequences)[0])
94
 
95
- # input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt")
96
- # #output_ids = fine_tuned_model.generate(input_ids,do_sample=True, max_length=100, temperature=0.2, top_p=0.9, repetition_penalty=1.5,num_return_sequences=6)
97
- # output_ids = untethered_model.generate(input_ids,do_sample=do_sample, max_length=100, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences, num_beams = num_beams)
98
- # outputs = ""
99
- # for output_id in output_ids:
100
- # output = tokenizer.decode(output_id, skip_special_tokens=True)
101
- # outputs=outputs+output+"\\n"
102
- # return outputs
103
 
104
  def create_response_original(input_str,
105
  num_beams,
@@ -197,45 +197,45 @@ interface1 = gr.Interface(fn=create_response_original,
197
  ], outputs=[gr.Textbox(label="output response", lines=30)])
198
 
199
 
200
- # interface2 = gr.Interface(fn=create_response_untethered,
201
- # title="untethered",
202
- # description="untethered fine tuning",
203
- # examples=[
204
- # ["What is death?",7,5,0.2,1.5,0.9,50,True], # The first example
205
- # ["One of the best teachers in all of life turns out to be what?",7,5,0.2,1.5,0.9,50,True], # The second example
206
- # ["what is your most meaningful relationship?",7,5,0.2,1.5,0.9,50,True], # The third example
207
- # ["What actually gives life meaning?",7,5,0.2,1.5,0.9,50,True]
208
- # ],
209
- # inputs=[
210
- # gr.Textbox(label="input text here", lines=3),
211
- # gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
212
- # "likely ones (specified in num_beams)", value=7),
213
- # gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output",
214
- # value=5),
215
- # gr.Number(
216
- # label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
217
- # " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
218
- # " makes the output more deterministic and focused",
219
- # value=0.2),
220
- # gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
221
- # "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
222
- # "in more varied and non-repetitive output.",
223
- # value=1.5),
224
- # gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
225
- # " to reach a certain threshold",
226
- # value=0.9),
227
- # gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
228
- # "This means that only the tokens with the highest probabilities are considered for sampling" +
229
- # "This reduces the diversity of the generated sequences, "+
230
- # "but also makes them more likely to be coherent and fluent.",
231
- # value=50),
232
- # gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
233
- # "which means that it will select the word with the highest probability at each step. " +
234
- # "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
235
- # "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
236
- # " select a word from the probability distribution at each step. This results in a more diverse and creative" +
237
- # " output, but it might also introduce errors and inconsistencies ", value=True)
238
- # ], outputs=[gr.Textbox(label="output response", lines=30)])
239
 
240
  interface3 = gr.Interface(fn=create_response_untethered_paraphrased,
241
  title="untethered paraphrased",
@@ -283,8 +283,8 @@ interface3 = gr.Interface(fn=create_response_untethered_paraphrased,
283
 
284
 
285
  #interface2 = gr.Interface(fn=create_response_fine_tuned, inputs="text", outputs="text", title="Fine Tuned")
286
- #demo = gr.TabbedInterface([interface1, interface2, interface3], ["Original", "Untethered", "Untethered paraphrased"])
287
- demo = gr.TabbedInterface([interface1, interface3], ["Original", "Untethered paraphrased"])
288
  # with gr.Blocks() as demo:
289
  # with gr.Row():
290
  #
 
9
  #generator = pipeline('text-generation', model="microsoft/DialoGPT-medium")
10
  tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
11
  original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
12
+ untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
13
  untethered_paraphrased_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240227_epochs_350')
14
 
15
  def create_response_untethered_paraphrased(input_str,
 
58
 
59
 
60
 
61
+ def create_response_untethered(input_str,
62
+ num_beams,
63
+ num_return_sequences,
64
+ temperature,
65
+ repetition_penalty,
66
+ top_p,
67
+ top_k,
68
+ do_sample):
69
+ print("input_str="+input_str)
70
+ num_beams = int(num_beams)
71
+ print("num_beams=" + str(num_beams))
72
+ num_return_sequences=int(num_return_sequences)
73
+ print("num_return_sequences" + str(num_return_sequences))
74
+ print("top_p" + str(top_p))
75
+ top_k=int(top_k)
76
+ print("top_k" + str(top_k))
77
+ print("repetition_penalty" + str(repetition_penalty))
78
+ print("temperature" + str(temperature))
79
+ print("do_sample" + str(do_sample))
80
+ if not do_sample:
81
+ num_beams = 1
82
+ print("num_beams=" + str(num_beams))
83
 
84
+ #output_raw= generator(input_str)
85
+ """print (output_raw)"""
86
 
87
+ #output_str = output_raw[0]['generated_text']
88
+ #output_str = output_str.replace("\n", "")
89
+ #output_str = output_str.replace(input_str, "")
90
+ #output_str = tokenizer.decode(model.generate(**tokenizer("What are John West's hobbies?"+tokenizer.eos_token,return_tensors="pt",max_length=200))[0])
91
+ # output_str = tokenizer.decode(original_model.generate(**tokenizer(input_str+tokenizer.eos_token,return_tensors="pt",max_length=200),
92
+ # num_beams=num_beams,
93
+ # num_return_sequences=num_return_sequences)[0])
94
 
95
+ input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt")
96
+ #output_ids = fine_tuned_model.generate(input_ids,do_sample=True, max_length=100, temperature=0.2, top_p=0.9, repetition_penalty=1.5,num_return_sequences=6)
97
+ output_ids = untethered_model.generate(input_ids,do_sample=do_sample, max_length=100, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences, num_beams = num_beams)
98
+ outputs = ""
99
+ for output_id in output_ids:
100
+ output = tokenizer.decode(output_id, skip_special_tokens=True)
101
+ outputs=outputs+output+"\\n"
102
+ return outputs
103
 
104
  def create_response_original(input_str,
105
  num_beams,
 
197
  ], outputs=[gr.Textbox(label="output response", lines=30)])
198
 
199
 
200
+ interface2 = gr.Interface(fn=create_response_untethered,
201
+ title="untethered",
202
+ description="untethered fine tuning",
203
+ examples=[
204
+ ["What is death?",7,5,0.2,1.5,0.9,50,True], # The first example
205
+ ["One of the best teachers in all of life turns out to be what?",7,5,0.2,1.5,0.9,50,True], # The second example
206
+ ["what is your most meaningful relationship?",7,5,0.2,1.5,0.9,50,True], # The third example
207
+ ["What actually gives life meaning?",7,5,0.2,1.5,0.9,50,True]
208
+ ],
209
+ inputs=[
210
+ gr.Textbox(label="input text here", lines=3),
211
+ gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
212
+ "likely ones (specified in num_beams)", value=7),
213
+ gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output",
214
+ value=5),
215
+ gr.Number(
216
+ label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
217
+ " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
218
+ " makes the output more deterministic and focused",
219
+ value=0.2),
220
+ gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
221
+ "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
222
+ "in more varied and non-repetitive output.",
223
+ value=1.5),
224
+ gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
225
+ " to reach a certain threshold",
226
+ value=0.9),
227
+ gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
228
+ "This means that only the tokens with the highest probabilities are considered for sampling" +
229
+ "This reduces the diversity of the generated sequences, "+
230
+ "but also makes them more likely to be coherent and fluent.",
231
+ value=50),
232
+ gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
233
+ "which means that it will select the word with the highest probability at each step. " +
234
+ "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
235
+ "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
236
+ " select a word from the probability distribution at each step. This results in a more diverse and creative" +
237
+ " output, but it might also introduce errors and inconsistencies ", value=True)
238
+ ], outputs=[gr.Textbox(label="output response", lines=30)])
239
 
240
  interface3 = gr.Interface(fn=create_response_untethered_paraphrased,
241
  title="untethered paraphrased",
 
283
 
284
 
285
  #interface2 = gr.Interface(fn=create_response_fine_tuned, inputs="text", outputs="text", title="Fine Tuned")
286
+ demo = gr.TabbedInterface([interface1, interface2, interface3], ["Original", "Untethered", "Untethered paraphrased"])
287
+ #demo = gr.TabbedInterface([interface1, interface3], ["Original", "Untethered paraphrased"])
288
  # with gr.Blocks() as demo:
289
  # with gr.Row():
290
  #