Spaces:

zmbfeng
/

testchatbot

Sleeping

App Files Files Community

zmbfeng commited on Mar 1, 2024

Commit

2376ae8

verified ·

1 Parent(s): 9c6e8e6

use default values

Browse files

Files changed (1) hide show

app.py +45 -34

app.py CHANGED Viewed

@@ -13,14 +13,19 @@ tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
 original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
 untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
 untethered_paraphrased_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240227_epochs_350')
 def create_response(input_str,
                              # num_beams,
                              num_return_sequences,
                              temperature,
                              repetition_penalty,
                              top_p,
-                             # top_k,
                              do_sample,
                              model_name):
   print("input_str="+input_str)
@@ -59,12 +64,18 @@ def create_response(input_str,
     outputs=outputs+output+"<br/>"
   return outputs
 common_examples=[
-      ["What is death?",5,0.2,1.5,0.9,True], # The first example
-      ["One of the best teachers in all of life turns out to be what?",5,0.2,1.5,0.9,True], # The second example
-      ["what is your most meaningful relationship?",5,0.2,1.5,0.9,True], # The third example
-      ["What actually gives life meaning?",5,0.2,1.5,0.9,True]
     ]
 examples = copy.deepcopy(common_examples)
 print(examples)
@@ -85,25 +96,25 @@ interface_original = gr.Interface(fn=create_response,
             label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
                   " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
                   " makes the output more deterministic and focused",
-            value=0.2),
         gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
                         "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
                         "in more varied and non-repetitive output.",
-                  value=1.5),
         gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
                         " to reach a certain threshold",
-                  value=0.9),
-        # gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
-        #                 "This means that only the tokens with the highest probabilities are considered for sampling" +
-        #                 "This reduces the diversity of the generated sequences, "+
-        #                 "but also makes them more likely to be coherent and fluent.",
-        #           value=50),
         gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
                           "which means that it will select the word with the highest probability at each step. " +
                           "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
                           "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
                           " select a word from the probability distribution at each step. This results in a more diverse and creative" +
-                          " output, but it might also introduce errors and inconsistencies ", value=True),
         gr.Textbox(label="model", lines=3, value="original_model",visible=False)
     ],
     outputs="html"
@@ -129,25 +140,25 @@ interface_untethered_model = gr.Interface(fn=create_response,
             label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
                   " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
                   " makes the output more deterministic and focused",
-            value=0.2),
         gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
                         "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
                         "in more varied and non-repetitive output.",
-                  value=1.5),
         gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
                         " to reach a certain threshold",
-                  value=0.9),
-        # gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
-        #                 "This means that only the tokens with the highest probabilities are considered for sampling" +
-        #                 "This reduces the diversity of the generated sequences, "+
-        #                 "but also makes them more likely to be coherent and fluent.",
-        #           value=50),
         gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
                           "which means that it will select the word with the highest probability at each step. " +
                           "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
                           "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
                           " select a word from the probability distribution at each step. This results in a more diverse and creative" +
-                          " output, but it might also introduce errors and inconsistencies ", value=True),
         gr.Textbox(label="model", lines=3, value="untethered_model",visible=False)
     ],
     outputs="html"
@@ -163,7 +174,7 @@ interface_untethered_paraphrased_model = gr.Interface(fn=create_response,
     description="language model fine tuned with'The Untethered Soul' chapter 17 paraphrased",
     examples=examples,
     inputs=[
-    gr.Textbox(label="input text here", lines=3),
     # gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
     #             "likely ones (specified in num_beams)", value=7),
     gr.Number(label="num_return_sequences (integer)  the number of outputs selected from num_beams possible output",
@@ -172,25 +183,25 @@ interface_untethered_paraphrased_model = gr.Interface(fn=create_response,
             label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
                   " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
                   " makes the output more deterministic and focused",
-            value=0.2),
         gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
                         "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
                         "in more varied and non-repetitive output.",
-                  value=1.5),
         gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
                         " to reach a certain threshold",
-                  value=0.9),
-        # gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
-        #                 "This means that only the tokens with the highest probabilities are considered for sampling" +
-        #                 "This reduces the diversity of the generated sequences, "+
-        #                 "but also makes them more likely to be coherent and fluent.",
-        #           value=50),
         gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
                           "which means that it will select the word with the highest probability at each step. " +
                           "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
                           "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
                           " select a word from the probability distribution at each step. This results in a more diverse and creative" +
-                          " output, but it might also introduce errors and inconsistencies ", value=True),
         gr.Textbox(label="model", lines=3, value="untethered_paraphrased_model",visible=False)
     ],
     outputs= "html"

 original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
 untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
 untethered_paraphrased_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240227_epochs_350')
+default_num_return_sequences=5
+default_temperature=0.5
+default_repetition_penalty=1.5
+default_top_p=1.9
+default_top_k=50
+default_do_sample=True
 def create_response(input_str,
                              # num_beams,
                              num_return_sequences,
                              temperature,
                              repetition_penalty,
                              top_p,
+                             top_k,
                              do_sample,
                              model_name):
   print("input_str="+input_str)
     outputs=outputs+output+"<br/>"
   return outputs
+default_num_return_sequences=5
+default_temperature=0.5
+default_repetition_penalty=1.5
+default_top_p=1.9
+default_top_k=50
+default_do_sample=True
 common_examples=[
+      ["What is death?",default_num_return_sequences,default_temperature,default_repetition_penalty,default_top_p,default_top_k,default_do_sample], # The first example
+      ["One of the best teachers in all of life turns out to be what?",default_num_return_sequences,default_temperature,default_repetition_penalty,default_top_p,default_top_k,default_do_sample], # The second example
+      ["what is your most meaningful relationship?",default_num_return_sequences,default_temperature,default_repetition_penalty,default_top_p,default_top_k,default_do_sample], # The third example
+      ["What actually gives life meaning?",default_num_return_sequences,default_temperature,default_repetition_penalty,default_top_p,default_top_k,default_do_sample]
     ]
 examples = copy.deepcopy(common_examples)
 print(examples)
             label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
                   " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
                   " makes the output more deterministic and focused",
+            value=default_num_return_sequences),
         gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
                         "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
                         "in more varied and non-repetitive output.",
+                  value=default_repetition_penalty),
         gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
                         " to reach a certain threshold",
+                  value=default_top_p),
+        gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
+                        "This means that only the tokens with the highest probabilities are considered for sampling" +
+                        "This reduces the diversity of the generated sequences, "+
+                        "but also makes them more likely to be coherent and fluent.",
+                  value=default_top_k),
         gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
                           "which means that it will select the word with the highest probability at each step. " +
                           "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
                           "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
                           " select a word from the probability distribution at each step. This results in a more diverse and creative" +
+                          " output, but it might also introduce errors and inconsistencies ", value=default_do_sample),
         gr.Textbox(label="model", lines=3, value="original_model",visible=False)
     ],
     outputs="html"
             label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
                   " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
                   " makes the output more deterministic and focused",
+            value=default_num_return_sequences),
         gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
                         "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
                         "in more varied and non-repetitive output.",
+                  value=default_repetition_penalty),
         gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
                         " to reach a certain threshold",
+                  value=default_top_p),
+        gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
+                        "This means that only the tokens with the highest probabilities are considered for sampling" +
+                        "This reduces the diversity of the generated sequences, "+
+                        "but also makes them more likely to be coherent and fluent.",
+                  value=default_top_k),
         gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
                           "which means that it will select the word with the highest probability at each step. " +
                           "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
                           "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
                           " select a word from the probability distribution at each step. This results in a more diverse and creative" +
+                          " output, but it might also introduce errors and inconsistencies ", value=default_do_sample),
         gr.Textbox(label="model", lines=3, value="untethered_model",visible=False)
     ],
     outputs="html"
     description="language model fine tuned with'The Untethered Soul' chapter 17 paraphrased",
     examples=examples,
     inputs=[
+   gr.Textbox(label="input text here", lines=3),
     # gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
     #             "likely ones (specified in num_beams)", value=7),
     gr.Number(label="num_return_sequences (integer)  the number of outputs selected from num_beams possible output",
             label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
                   " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
                   " makes the output more deterministic and focused",
+            value=default_num_return_sequences),
         gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
                         "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
                         "in more varied and non-repetitive output.",
+                  value=default_repetition_penalty),
         gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
                         " to reach a certain threshold",
+                  value=default_top_p),
+        gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
+                        "This means that only the tokens with the highest probabilities are considered for sampling" +
+                        "This reduces the diversity of the generated sequences, "+
+                        "but also makes them more likely to be coherent and fluent.",
+                  value=default_top_k),
         gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
                           "which means that it will select the word with the highest probability at each step. " +
                           "This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
                           "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
                           " select a word from the probability distribution at each step. This results in a more diverse and creative" +
+                          " output, but it might also introduce errors and inconsistencies ", value=default_do_sample),
         gr.Textbox(label="model", lines=3, value="untethered_paraphrased_model",visible=False)
     ],
     outputs= "html"