File size: 18,900 Bytes
a1e5063
94aba93
fa94b9c
 
94aba93
9c225f1
fa94b9c
94aba93
5a521e1
9c225f1
71852c2
1ac4018
aa9d7ef
a1e5063
aa9d7ef
57062ef
aa9d7ef
 
 
 
57062ef
aa9d7ef
 
1ee4b73
 
aa9d7ef
 
448dded
e690b8a
448dded
aa9d7ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a3a369c
 
 
 
aa9d7ef
faa25c0
621db44
aa9d7ef
 
 
5f35567
aa9d7ef
 
 
 
1ac4018
57062ef
1ac4018
 
 
 
57062ef
57ca910
 
1ac4018
1d40f83
1ee4b73
 
1ac4018
 
 
e690b8a
 
1ac4018
 
 
 
 
 
aa9d7ef
1ac4018
 
aa9d7ef
1ac4018
 
 
 
 
 
 
aa9d7ef
a16ea65
 
 
 
 
 
1ac4018
621db44
1ac4018
 
 
 
 
aa9d7ef
78683da
57062ef
78683da
 
 
 
57062ef
78683da
128b743
1ee4b73
 
fd4136c
24a2b9a
 
e690b8a
 
24a2b9a
 
 
c27f13b
 
 
 
5a521e1
94aba93
ee51c1e
5a521e1
 
 
81384e9
6882935
 
 
 
a3a369c
 
 
 
9e24d40
621db44
721ea62
6882935
 
721ea62
6882935
94aba93
71852c2
 
 
 
 
 
 
 
 
 
aa9d7ef
 
24158f9
 
 
 
57062ef
 
 
 
24158f9
 
07cc877
57062ef
 
78683da
 
 
 
 
 
 
 
 
 
 
 
 
 
57062ef
 
 
 
 
78683da
 
 
 
 
 
bebb895
aa9d7ef
 
1ac4018
 
 
 
57062ef
 
 
 
1ac4018
 
 
57062ef
 
1ac4018
 
 
 
 
 
 
 
 
 
 
 
 
 
57062ef
 
 
 
 
1ac4018
 
 
 
 
57ca910
1d40f83
 
1ac4018
aa9d7ef
 
 
 
 
57062ef
 
 
 
aa9d7ef
 
 
57062ef
 
aa9d7ef
 
 
 
 
 
 
 
 
 
 
 
 
 
57062ef
 
 
 
 
aa9d7ef
 
 
 
 
 
5f35567
 
aa9d7ef
 
 
 
 
 
 
1ac4018
 
cab4101
 
 
94aba93
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
import gradio as gr
import random
import os
from huggingface_hub import login
from transformers import pipeline
from transformers import GPT2Tokenizer, GPT2LMHeadModel
login(os.environ["HF_TOKEN"])
#https://huggingface.co/facebook/opt-1.3b
#generator = pipeline('text-generation', model="microsoft/DialoGPT-medium")
tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
untethered_paraphrased_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240227_epochs_350')

def create_response_untethered_paraphrased(input_str, 
                             # num_beams, 
                             num_return_sequences,
                             temperature,
                             repetition_penalty,
                             top_p,
                             # top_k,
                             do_sample):
  print("input_str="+input_str)
  # num_beams = int(num_beams)
  # print("num_beams=" + str(num_beams))
  num_return_sequences=int(num_return_sequences)                               
  print("num_return_sequences" + str(num_return_sequences))
  print("top_p" + str(top_p))
  # top_k=int(top_k)
  #print("top_k" + str(top_k))
  print("repetition_penalty" + str(repetition_penalty))
  print("temperature" + str(temperature))
  print("do_sample" + str(do_sample))
  if not do_sample:
      num_beams = 1
      print("num_beams=" + str(num_beams))
      
  #output_raw= generator(input_str)
  """print (output_raw)"""

  #output_str = output_raw[0]['generated_text']
  #output_str = output_str.replace("\n", "")
  #output_str = output_str.replace(input_str, "")
  #output_str = tokenizer.decode(model.generate(**tokenizer("What are John West's hobbies?"+tokenizer.eos_token,return_tensors="pt",max_length=200))[0])
  # output_str = tokenizer.decode(original_model.generate(**tokenizer(input_str+tokenizer.eos_token,return_tensors="pt",max_length=200),
  #                                                      num_beams=num_beams,
  #                                                      num_return_sequences=num_return_sequences)[0])

  #input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt")
  encoded = tokenizer.encode_plus(input_str + tokenizer.eos_token, return_tensors="pt")
  input_ids = encoded["input_ids"]
  attention_mask = encoded["attention_mask"]
  #output_ids = fine_tuned_model.generate(input_ids,do_sample=True, max_length=100, temperature=0.2, top_p=0.9, repetition_penalty=1.5,num_return_sequences=6)                                
  #output_ids = untethered_paraphrased_model.generate(input_ids,do_sample=do_sample, max_length=100, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences, num_beams = num_beams)                                
  output_ids = untethered_paraphrased_model.generate(input_ids,pad_token_id=tokenizer.eos_token_id,do_sample=True,attention_mask=attention_mask,  max_length=100, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences)
  outputs = ""                                
  for output_id in output_ids:
    output = tokenizer.decode(output_id, skip_special_tokens=True)
    outputs=outputs+output+"<br/>"
  return outputs


                                 
def create_response_untethered(input_str, 
                             # num_beams, 
                             num_return_sequences,
                             temperature,
                             repetition_penalty,
                             top_p,
                             # top_k,
                             do_sample,
                             model_name):
  print("input_str="+input_str)
  print("model_name="+str(model_name))
  # num_beams = int(num_beams)
  # print("num_beams=" + str(num_beams))
  num_return_sequences=int(num_return_sequences)                               
  print("num_return_sequences" + str(num_return_sequences))
  print("top_p" + str(top_p))
  # top_k=int(top_k)
  # print("top_k" + str(top_k))
  print("repetition_penalty" + str(repetition_penalty))
  print("temperature" + str(temperature))
  print("do_sample" + str(do_sample))
  if not do_sample:
      num_beams = 1
      print("num_beams=" + str(num_beams))
      
  #output_raw= generator(input_str)
  """print (output_raw)"""

  #output_str = output_raw[0]['generated_text']
  #output_str = output_str.replace("\n", "")
  #output_str = output_str.replace(input_str, "")
  #output_str = tokenizer.decode(model.generate(**tokenizer("What are John West's hobbies?"+tokenizer.eos_token,return_tensors="pt",max_length=200))[0])
  # output_str = tokenizer.decode(original_model.generate(**tokenizer(input_str+tokenizer.eos_token,return_tensors="pt",max_length=200),
  #                                                      num_beams=num_beams,
  #                                                      num_return_sequences=num_return_sequences)[0])

  #input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt")
  encoded = tokenizer.encode_plus(input_str + tokenizer.eos_token, return_tensors="pt")
  input_ids = encoded["input_ids"]
  attention_mask = encoded["attention_mask"]


  #output_ids = fine_tuned_model.generate(input_ids,do_sample=True, max_length=100, temperature=0.2, top_p=0.9, repetition_penalty=1.5,num_return_sequences=6)                                
  output_ids = untethered_model.generate(input_ids,pad_token_id=tokenizer.eos_token_id,do_sample=do_sample, attention_mask=attention_mask, max_length=100, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences )                                
  outputs = ""                                
  for output_id in output_ids:
    output = tokenizer.decode(output_id, skip_special_tokens=True)
    outputs=outputs+output+"\\n"
  return outputs
                                 
def create_response_original(input_str, 
                             # num_beams, 
                             num_return_sequences,
                             temperature,
                             repetition_penalty,
                             top_p,
                             # top_k,
                             do_sample):
  print("input_str="+input_str)
  # num_beams = int(num_beams)
  # print("num_beams=" + str(num_beams))
  num_return_sequences=int(num_return_sequences)                               
  print("num_return_sequences" + str(num_return_sequences))
  print("top_p" + str(top_p))
  # top_k=int(top_k)
  # print("top_k" + str(top_k))
  print("repetition_penalty" + str(repetition_penalty))
  print("temperature" + str(temperature))
  print("do_sample" + str(do_sample))
  if not do_sample:
      num_beams = 1
      print("num_beams=" + str(num_beams))
      
  #output_raw= generator(input_str)
  """print (output_raw)"""

  #output_str = output_raw[0]['generated_text']
  #output_str = output_str.replace("\n", "")
  #output_str = output_str.replace(input_str, "")
  #output_str = tokenizer.decode(model.generate(**tokenizer("What are John West's hobbies?"+tokenizer.eos_token,return_tensors="pt",max_length=200))[0])
  # output_str = tokenizer.decode(original_model.generate(**tokenizer(input_str+tokenizer.eos_token,return_tensors="pt",max_length=200),
  #                                                      num_beams=num_beams,
  #                                                      num_return_sequences=num_return_sequences)[0])

  #input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt")
  encoded = tokenizer.encode_plus(input_str + tokenizer.eos_token, return_tensors="pt")
  input_ids = encoded["input_ids"]
  attention_mask = encoded["attention_mask"]
  #output_ids = fine_tuned_model.generate(input_ids,do_sample=True, max_length=100, temperature=0.2, top_p=0.9, repetition_penalty=1.5,num_return_sequences=6)                                
  output_ids = original_model.generate(input_ids,pad_token_id=tokenizer.eos_token_id,do_sample=do_sample,attention_mask=attention_mask,  max_length=100, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences)                                
  outputs = ""                                
  for output_id in output_ids:
    output = tokenizer.decode(output_id, skip_special_tokens=True)
    outputs=outputs+output+"\\n"
  return outputs

def create_response_fine_tuned(input_str):
  #output_raw= generator(input_str)
  """print (output_raw)"""

  #output_str = output_raw[0]['generated_text']
  #output_str = output_str.replace("\n", "")
  #output_str = output_str.replace(input_str, "")
  #output_str = tokenizer.decode(model.generate(**tokenizer("What are John West's hobbies?"+tokenizer.eos_token,return_tensors="pt",max_length=200))[0])
  output_str = tokenizer.decode(fine_tuned_model.generate(**tokenizer(input_str+tokenizer.eos_token,return_tensors="pt",max_length=200))[0])
  return (output_str)


interface1 = gr.Interface(fn=create_response_original, 
    title="original",
    description="original language model, no fine tuning",
    examples=[ 
      ["What is death?",5,0.2,1.5,0.9,True], # The first example
      ["One of the best teachers in all of life turns out to be what?",5,0.2,1.5,0.9,True], # The second example
      ["what is your most meaningful relationship?",5,0.2,1.5,0.9,True], # The third example
      ["What actually gives life meaning?",5,0.2,1.5,0.9,True]
    ],
    inputs=[
    gr.Textbox(label="input text here", lines=3),
    # gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
    #             "likely ones (specified in num_beams)", value=7),
    gr.Number(label="num_return_sequences (integer)  the number of outputs selected from num_beams possible output",
                  value=5),
        gr.Number(
            label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
                  " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
                  " makes the output more deterministic and focused",
            value=0.2),
        gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
                        "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
                        "in more varied and non-repetitive output.",
                  value=1.5),
        gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
                        " to reach a certain threshold",
                  value=0.9),
        # gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
        #                 "This means that only the tokens with the highest probabilities are considered for sampling" +
        #                 "This reduces the diversity of the generated sequences, "+
        #                 "but also makes them more likely to be coherent and fluent.",
        #           value=50),
        gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
                          "which means that it will select the word with the highest probability at each step. " +
                          "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
                          "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
                          " select a word from the probability distribution at each step. This results in a more diverse and creative" +
                          " output, but it might also introduce errors and inconsistencies ", value=True)
    ], outputs=[gr.Textbox(label="output response", lines=30)])


interface2 = gr.Interface(fn=create_response_untethered, 
    title="untethered",
    description="untethered fine tuning",
    examples=[ 
      ["What is death?",5,0.2,1.5,0.9,True], # The first example
      ["One of the best teachers in all of life turns out to be what?",5,0.2,1.5,0.9,True], # The second example
      ["what is your most meaningful relationship?",5,0.2,1.5,0.9,True], # The third example
      ["What actually gives life meaning?",5,0.2,1.5,0.9,True]
    ],
    inputs=[
    gr.Textbox(label="input text here", lines=3),
    # gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
    #             "likely ones (specified in num_beams)", value=7),
    gr.Number(label="num_return_sequences (integer)  the number of outputs selected from num_beams possible output",
                  value=5),
        gr.Number(
            label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
                  " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
                  " makes the output more deterministic and focused",
            value=0.2),
        gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
                        "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
                        "in more varied and non-repetitive output.",
                  value=1.5),
        gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
                        " to reach a certain threshold",
                  value=0.9),
        # gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
        #                 "This means that only the tokens with the highest probabilities are considered for sampling" +
        #                 "This reduces the diversity of the generated sequences, "+
        #                 "but also makes them more likely to be coherent and fluent.",
        #           value=50),
        gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
                          "which means that it will select the word with the highest probability at each step. " +
                          "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
                          "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
                          " select a word from the probability distribution at each step. This results in a more diverse and creative" +
                          " output, but it might also introduce errors and inconsistencies ", value=True),
        gr.Textbox(label="model", lines=3, value="untethered_model_name")
        #,visible=False
    ], outputs=[gr.Textbox(label="output response", lines=30)])

interface3 = gr.Interface(fn=create_response_untethered_paraphrased, 
    title="untethered paraphrased",
    description="untethered paraphrased fine tuning",
    examples=[ 
      ["What is death?",5,0.2,1.5,0.9,True], # The first example
      ["One of the best teachers in all of life turns out to be what?",5,0.2,1.5,0.9,True], # The second example
      ["what is your most meaningful relationship?",5,0.2,1.5,0.9,True], # The third example
      ["What actually gives life meaning?",5,0.2,1.5,0.9,True]
    ],
    inputs=[
    gr.Textbox(label="input text here", lines=3),
    # gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
    #             "likely ones (specified in num_beams)", value=7),
    gr.Number(label="num_return_sequences (integer)  the number of outputs selected from num_beams possible output",
                  value=5),
        gr.Number(
            label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
                  " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
                  " makes the output more deterministic and focused",
            value=0.2),
        gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
                        "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
                        "in more varied and non-repetitive output.",
                  value=1.5),
        gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
                        " to reach a certain threshold",
                  value=0.9),
        # gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
        #                 "This means that only the tokens with the highest probabilities are considered for sampling" +
        #                 "This reduces the diversity of the generated sequences, "+
        #                 "but also makes them more likely to be coherent and fluent.",
        #           value=50),
        gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
                          "which means that it will select the word with the highest probability at each step. " +
                          "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
                          "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
                          " select a word from the probability distribution at each step. This results in a more diverse and creative" +
                          " output, but it might also introduce errors and inconsistencies ", value=True)
    ], outputs="html")
#[gr.Textbox(label="output response", lines=30)]






#interface2 = gr.Interface(fn=create_response_fine_tuned, inputs="text", outputs="text", title="Fine Tuned")
demo = gr.TabbedInterface([interface1, interface2, interface3], ["Original", "Untethered", "Untethered paraphrased"])
#demo = gr.TabbedInterface([interface1, interface3], ["Original", "Untethered paraphrased"])
# with gr.Blocks() as demo:
#   with gr.Row():
#
demo.launch()