zmbfeng commited on
Commit
2376ae8
·
verified ·
1 Parent(s): 9c6e8e6

use default values

Browse files
Files changed (1) hide show
  1. app.py +45 -34
app.py CHANGED
@@ -13,14 +13,19 @@ tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
13
  original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
14
  untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
15
  untethered_paraphrased_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240227_epochs_350')
16
-
 
 
 
 
 
17
  def create_response(input_str,
18
  # num_beams,
19
  num_return_sequences,
20
  temperature,
21
  repetition_penalty,
22
  top_p,
23
- # top_k,
24
  do_sample,
25
  model_name):
26
  print("input_str="+input_str)
@@ -59,12 +64,18 @@ def create_response(input_str,
59
  outputs=outputs+output+"<br/>"
60
  return outputs
61
 
 
 
 
 
 
 
62
 
63
  common_examples=[
64
- ["What is death?",5,0.2,1.5,0.9,True], # The first example
65
- ["One of the best teachers in all of life turns out to be what?",5,0.2,1.5,0.9,True], # The second example
66
- ["what is your most meaningful relationship?",5,0.2,1.5,0.9,True], # The third example
67
- ["What actually gives life meaning?",5,0.2,1.5,0.9,True]
68
  ]
69
  examples = copy.deepcopy(common_examples)
70
  print(examples)
@@ -85,25 +96,25 @@ interface_original = gr.Interface(fn=create_response,
85
  label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
86
  " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
87
  " makes the output more deterministic and focused",
88
- value=0.2),
89
  gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
90
  "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
91
  "in more varied and non-repetitive output.",
92
- value=1.5),
93
  gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
94
  " to reach a certain threshold",
95
- value=0.9),
96
- # gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
97
- # "This means that only the tokens with the highest probabilities are considered for sampling" +
98
- # "This reduces the diversity of the generated sequences, "+
99
- # "but also makes them more likely to be coherent and fluent.",
100
- # value=50),
101
  gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
102
  "which means that it will select the word with the highest probability at each step. " +
103
  "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
104
  "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
105
  " select a word from the probability distribution at each step. This results in a more diverse and creative" +
106
- " output, but it might also introduce errors and inconsistencies ", value=True),
107
  gr.Textbox(label="model", lines=3, value="original_model",visible=False)
108
  ],
109
  outputs="html"
@@ -129,25 +140,25 @@ interface_untethered_model = gr.Interface(fn=create_response,
129
  label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
130
  " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
131
  " makes the output more deterministic and focused",
132
- value=0.2),
133
  gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
134
  "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
135
  "in more varied and non-repetitive output.",
136
- value=1.5),
137
  gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
138
  " to reach a certain threshold",
139
- value=0.9),
140
- # gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
141
- # "This means that only the tokens with the highest probabilities are considered for sampling" +
142
- # "This reduces the diversity of the generated sequences, "+
143
- # "but also makes them more likely to be coherent and fluent.",
144
- # value=50),
145
  gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
146
  "which means that it will select the word with the highest probability at each step. " +
147
  "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
148
  "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
149
  " select a word from the probability distribution at each step. This results in a more diverse and creative" +
150
- " output, but it might also introduce errors and inconsistencies ", value=True),
151
  gr.Textbox(label="model", lines=3, value="untethered_model",visible=False)
152
  ],
153
  outputs="html"
@@ -163,7 +174,7 @@ interface_untethered_paraphrased_model = gr.Interface(fn=create_response,
163
  description="language model fine tuned with'The Untethered Soul' chapter 17 paraphrased",
164
  examples=examples,
165
  inputs=[
166
- gr.Textbox(label="input text here", lines=3),
167
  # gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
168
  # "likely ones (specified in num_beams)", value=7),
169
  gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output",
@@ -172,25 +183,25 @@ interface_untethered_paraphrased_model = gr.Interface(fn=create_response,
172
  label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
173
  " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
174
  " makes the output more deterministic and focused",
175
- value=0.2),
176
  gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
177
  "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
178
  "in more varied and non-repetitive output.",
179
- value=1.5),
180
  gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
181
  " to reach a certain threshold",
182
- value=0.9),
183
- # gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
184
- # "This means that only the tokens with the highest probabilities are considered for sampling" +
185
- # "This reduces the diversity of the generated sequences, "+
186
- # "but also makes them more likely to be coherent and fluent.",
187
- # value=50),
188
  gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
189
  "which means that it will select the word with the highest probability at each step. " +
190
  "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
191
  "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
192
  " select a word from the probability distribution at each step. This results in a more diverse and creative" +
193
- " output, but it might also introduce errors and inconsistencies ", value=True),
194
  gr.Textbox(label="model", lines=3, value="untethered_paraphrased_model",visible=False)
195
  ],
196
  outputs= "html"
 
13
  original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
14
  untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
15
  untethered_paraphrased_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240227_epochs_350')
16
+ default_num_return_sequences=5
17
+ default_temperature=0.5
18
+ default_repetition_penalty=1.5
19
+ default_top_p=1.9
20
+ default_top_k=50
21
+ default_do_sample=True
22
  def create_response(input_str,
23
  # num_beams,
24
  num_return_sequences,
25
  temperature,
26
  repetition_penalty,
27
  top_p,
28
+ top_k,
29
  do_sample,
30
  model_name):
31
  print("input_str="+input_str)
 
64
  outputs=outputs+output+"<br/>"
65
  return outputs
66
 
67
+ default_num_return_sequences=5
68
+ default_temperature=0.5
69
+ default_repetition_penalty=1.5
70
+ default_top_p=1.9
71
+ default_top_k=50
72
+ default_do_sample=True
73
 
74
  common_examples=[
75
+ ["What is death?",default_num_return_sequences,default_temperature,default_repetition_penalty,default_top_p,default_top_k,default_do_sample], # The first example
76
+ ["One of the best teachers in all of life turns out to be what?",default_num_return_sequences,default_temperature,default_repetition_penalty,default_top_p,default_top_k,default_do_sample], # The second example
77
+ ["what is your most meaningful relationship?",default_num_return_sequences,default_temperature,default_repetition_penalty,default_top_p,default_top_k,default_do_sample], # The third example
78
+ ["What actually gives life meaning?",default_num_return_sequences,default_temperature,default_repetition_penalty,default_top_p,default_top_k,default_do_sample]
79
  ]
80
  examples = copy.deepcopy(common_examples)
81
  print(examples)
 
96
  label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
97
  " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
98
  " makes the output more deterministic and focused",
99
+ value=default_num_return_sequences),
100
  gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
101
  "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
102
  "in more varied and non-repetitive output.",
103
+ value=default_repetition_penalty),
104
  gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
105
  " to reach a certain threshold",
106
+ value=default_top_p),
107
+ gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
108
+ "This means that only the tokens with the highest probabilities are considered for sampling" +
109
+ "This reduces the diversity of the generated sequences, "+
110
+ "but also makes them more likely to be coherent and fluent.",
111
+ value=default_top_k),
112
  gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
113
  "which means that it will select the word with the highest probability at each step. " +
114
  "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
115
  "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
116
  " select a word from the probability distribution at each step. This results in a more diverse and creative" +
117
+ " output, but it might also introduce errors and inconsistencies ", value=default_do_sample),
118
  gr.Textbox(label="model", lines=3, value="original_model",visible=False)
119
  ],
120
  outputs="html"
 
140
  label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
141
  " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
142
  " makes the output more deterministic and focused",
143
+ value=default_num_return_sequences),
144
  gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
145
  "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
146
  "in more varied and non-repetitive output.",
147
+ value=default_repetition_penalty),
148
  gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
149
  " to reach a certain threshold",
150
+ value=default_top_p),
151
+ gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
152
+ "This means that only the tokens with the highest probabilities are considered for sampling" +
153
+ "This reduces the diversity of the generated sequences, "+
154
+ "but also makes them more likely to be coherent and fluent.",
155
+ value=default_top_k),
156
  gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
157
  "which means that it will select the word with the highest probability at each step. " +
158
  "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
159
  "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
160
  " select a word from the probability distribution at each step. This results in a more diverse and creative" +
161
+ " output, but it might also introduce errors and inconsistencies ", value=default_do_sample),
162
  gr.Textbox(label="model", lines=3, value="untethered_model",visible=False)
163
  ],
164
  outputs="html"
 
174
  description="language model fine tuned with'The Untethered Soul' chapter 17 paraphrased",
175
  examples=examples,
176
  inputs=[
177
+ gr.Textbox(label="input text here", lines=3),
178
  # gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
179
  # "likely ones (specified in num_beams)", value=7),
180
  gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output",
 
183
  label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
184
  " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
185
  " makes the output more deterministic and focused",
186
+ value=default_num_return_sequences),
187
  gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
188
  "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
189
  "in more varied and non-repetitive output.",
190
+ value=default_repetition_penalty),
191
  gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
192
  " to reach a certain threshold",
193
+ value=default_top_p),
194
+ gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
195
+ "This means that only the tokens with the highest probabilities are considered for sampling" +
196
+ "This reduces the diversity of the generated sequences, "+
197
+ "but also makes them more likely to be coherent and fluent.",
198
+ value=default_top_k),
199
  gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
200
  "which means that it will select the word with the highest probability at each step. " +
201
  "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
202
  "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
203
  " select a word from the probability distribution at each step. This results in a more diverse and creative" +
204
+ " output, but it might also introduce errors and inconsistencies ", value=default_do_sample),
205
  gr.Textbox(label="model", lines=3, value="untethered_paraphrased_model",visible=False)
206
  ],
207
  outputs= "html"