zmbfeng commited on
Commit
c90c5fd
·
verified ·
1 Parent(s): aa9d7ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -80
app.py CHANGED
@@ -9,7 +9,7 @@ login(os.environ["HF_TOKEN"])
9
  #generator = pipeline('text-generation', model="microsoft/DialoGPT-medium")
10
  tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
11
  original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
12
- untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
13
  untethered_paraphrased_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240227_epochs_350')
14
 
15
  def create_response_untethered_paraphrased(input_str,
@@ -57,48 +57,48 @@ def create_response_untethered_paraphrased(input_str,
57
 
58
 
59
 
60
- def create_response_untethered(input_str,
61
- num_beams,
62
- num_return_sequences,
63
- temperature,
64
- repetition_penalty,
65
- top_p,
66
- top_k,
67
- do_sample):
68
- print("input_str="+input_str)
69
- num_beams = int(num_beams)
70
- print("num_beams=" + str(num_beams))
71
- num_return_sequences=int(num_return_sequences)
72
- print("num_return_sequences" + str(num_return_sequences))
73
- print("top_p" + str(top_p))
74
- top_k=int(top_k)
75
- print("top_k" + str(top_k))
76
- print("repetition_penalty" + str(repetition_penalty))
77
- print("temperature" + str(temperature))
78
- print("do_sample" + str(do_sample))
79
- if not do_sample:
80
- num_beams = 1
81
- print("num_beams=" + str(num_beams))
82
 
83
- #output_raw= generator(input_str)
84
- """print (output_raw)"""
85
 
86
- #output_str = output_raw[0]['generated_text']
87
- #output_str = output_str.replace("\n", "")
88
- #output_str = output_str.replace(input_str, "")
89
- #output_str = tokenizer.decode(model.generate(**tokenizer("What are John West's hobbies?"+tokenizer.eos_token,return_tensors="pt",max_length=200))[0])
90
- # output_str = tokenizer.decode(original_model.generate(**tokenizer(input_str+tokenizer.eos_token,return_tensors="pt",max_length=200),
91
- # num_beams=num_beams,
92
- # num_return_sequences=num_return_sequences)[0])
93
 
94
- input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt")
95
- #output_ids = fine_tuned_model.generate(input_ids,do_sample=True, max_length=100, temperature=0.2, top_p=0.9, repetition_penalty=1.5,num_return_sequences=6)
96
- output_ids = untethered_model.generate(input_ids,do_sample=do_sample, max_length=100, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences, num_beams = num_beams)
97
- outputs = ""
98
- for output_id in output_ids:
99
- output = tokenizer.decode(output_id, skip_special_tokens=True)
100
- outputs=outputs+output+"\\n"
101
- return outputs
102
 
103
  def create_response_original(input_str,
104
  num_beams,
@@ -196,45 +196,45 @@ interface1 = gr.Interface(fn=create_response_original,
196
  ], outputs=[gr.Textbox(label="output response", lines=30)])
197
 
198
 
199
- interface2 = gr.Interface(fn=create_response_untethered,
200
- title="untethered",
201
- description="untethered fine tuning",
202
- examples=[
203
- ["What is death?",7,5,0.2,1.5,0.9,50,True], # The first example
204
- ["One of the best teachers in all of life turns out to be what?",7,5,0.2,1.5,0.9,50,True], # The second example
205
- ["what is your most meaningful relationship?",7,5,0.2,1.5,0.9,50,True], # The third example
206
- ["What actually gives life meaning?",7,5,0.2,1.5,0.9,50,True]
207
- ],
208
- inputs=[
209
- gr.Textbox(label="input text here", lines=3),
210
- gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
211
- "likely ones (specified in num_beams)", value=7),
212
- gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output",
213
- value=5),
214
- gr.Number(
215
- label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
216
- " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
217
- " makes the output more deterministic and focused",
218
- value=0.2),
219
- gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
220
- "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
221
- "in more varied and non-repetitive output.",
222
- value=1.5),
223
- gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
224
- " to reach a certain threshold",
225
- value=0.9),
226
- gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
227
- "This means that only the tokens with the highest probabilities are considered for sampling" +
228
- "This reduces the diversity of the generated sequences, "+
229
- "but also makes them more likely to be coherent and fluent.",
230
- value=50),
231
- gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
232
- "which means that it will select the word with the highest probability at each step. " +
233
- "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
234
- "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
235
- " select a word from the probability distribution at each step. This results in a more diverse and creative" +
236
- " output, but it might also introduce errors and inconsistencies ", value=True)
237
- ], outputs=[gr.Textbox(label="output response", lines=30)])
238
 
239
  interface3 = gr.Interface(fn=create_response_untethered_paraphrased,
240
  title="untethered paraphrased",
@@ -282,7 +282,8 @@ interface3 = gr.Interface(fn=create_response_untethered_paraphrased,
282
 
283
 
284
  #interface2 = gr.Interface(fn=create_response_fine_tuned, inputs="text", outputs="text", title="Fine Tuned")
285
- demo = gr.TabbedInterface([interface1, interface2, interface3], ["Original", "Untethered", "Untethered paraphrased"])
 
286
  # with gr.Blocks() as demo:
287
  # with gr.Row():
288
  #
 
9
  #generator = pipeline('text-generation', model="microsoft/DialoGPT-medium")
10
  tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
11
  original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
12
+ #untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
13
  untethered_paraphrased_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240227_epochs_350')
14
 
15
  def create_response_untethered_paraphrased(input_str,
 
57
 
58
 
59
 
60
+ # def create_response_untethered(input_str,
61
+ # num_beams,
62
+ # num_return_sequences,
63
+ # temperature,
64
+ # repetition_penalty,
65
+ # top_p,
66
+ # top_k,
67
+ # do_sample):
68
+ # print("input_str="+input_str)
69
+ # num_beams = int(num_beams)
70
+ # print("num_beams=" + str(num_beams))
71
+ # num_return_sequences=int(num_return_sequences)
72
+ # print("num_return_sequences" + str(num_return_sequences))
73
+ # print("top_p" + str(top_p))
74
+ # top_k=int(top_k)
75
+ # print("top_k" + str(top_k))
76
+ # print("repetition_penalty" + str(repetition_penalty))
77
+ # print("temperature" + str(temperature))
78
+ # print("do_sample" + str(do_sample))
79
+ # if not do_sample:
80
+ # num_beams = 1
81
+ # print("num_beams=" + str(num_beams))
82
 
83
+ # #output_raw= generator(input_str)
84
+ # """print (output_raw)"""
85
 
86
+ # #output_str = output_raw[0]['generated_text']
87
+ # #output_str = output_str.replace("\n", "")
88
+ # #output_str = output_str.replace(input_str, "")
89
+ # #output_str = tokenizer.decode(model.generate(**tokenizer("What are John West's hobbies?"+tokenizer.eos_token,return_tensors="pt",max_length=200))[0])
90
+ # # output_str = tokenizer.decode(original_model.generate(**tokenizer(input_str+tokenizer.eos_token,return_tensors="pt",max_length=200),
91
+ # # num_beams=num_beams,
92
+ # # num_return_sequences=num_return_sequences)[0])
93
 
94
+ # input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt")
95
+ # #output_ids = fine_tuned_model.generate(input_ids,do_sample=True, max_length=100, temperature=0.2, top_p=0.9, repetition_penalty=1.5,num_return_sequences=6)
96
+ # output_ids = untethered_model.generate(input_ids,do_sample=do_sample, max_length=100, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences, num_beams = num_beams)
97
+ # outputs = ""
98
+ # for output_id in output_ids:
99
+ # output = tokenizer.decode(output_id, skip_special_tokens=True)
100
+ # outputs=outputs+output+"\\n"
101
+ # return outputs
102
 
103
  def create_response_original(input_str,
104
  num_beams,
 
196
  ], outputs=[gr.Textbox(label="output response", lines=30)])
197
 
198
 
199
+ # interface2 = gr.Interface(fn=create_response_untethered,
200
+ # title="untethered",
201
+ # description="untethered fine tuning",
202
+ # examples=[
203
+ # ["What is death?",7,5,0.2,1.5,0.9,50,True], # The first example
204
+ # ["One of the best teachers in all of life turns out to be what?",7,5,0.2,1.5,0.9,50,True], # The second example
205
+ # ["what is your most meaningful relationship?",7,5,0.2,1.5,0.9,50,True], # The third example
206
+ # ["What actually gives life meaning?",7,5,0.2,1.5,0.9,50,True]
207
+ # ],
208
+ # inputs=[
209
+ # gr.Textbox(label="input text here", lines=3),
210
+ # gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
211
+ # "likely ones (specified in num_beams)", value=7),
212
+ # gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output",
213
+ # value=5),
214
+ # gr.Number(
215
+ # label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
216
+ # " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
217
+ # " makes the output more deterministic and focused",
218
+ # value=0.2),
219
+ # gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
220
+ # "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
221
+ # "in more varied and non-repetitive output.",
222
+ # value=1.5),
223
+ # gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
224
+ # " to reach a certain threshold",
225
+ # value=0.9),
226
+ # gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
227
+ # "This means that only the tokens with the highest probabilities are considered for sampling" +
228
+ # "This reduces the diversity of the generated sequences, "+
229
+ # "but also makes them more likely to be coherent and fluent.",
230
+ # value=50),
231
+ # gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
232
+ # "which means that it will select the word with the highest probability at each step. " +
233
+ # "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
234
+ # "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
235
+ # " select a word from the probability distribution at each step. This results in a more diverse and creative" +
236
+ # " output, but it might also introduce errors and inconsistencies ", value=True)
237
+ # ], outputs=[gr.Textbox(label="output response", lines=30)])
238
 
239
  interface3 = gr.Interface(fn=create_response_untethered_paraphrased,
240
  title="untethered paraphrased",
 
282
 
283
 
284
  #interface2 = gr.Interface(fn=create_response_fine_tuned, inputs="text", outputs="text", title="Fine Tuned")
285
+ #demo = gr.TabbedInterface([interface1, interface2, interface3], ["Original", "Untethered", "Untethered paraphrased"])
286
+ demo = gr.TabbedInterface([interface1, interface3], ["Original", "Untethered paraphrased"])
287
  # with gr.Blocks() as demo:
288
  # with gr.Row():
289
  #