import gradio as gr import random import os import copy import torch from huggingface_hub import login from transformers import pipeline from transformers import GPT2Tokenizer, GPT2LMHeadModel,set_seed login(os.environ["HF_TOKEN"]) #https://huggingface.co/facebook/opt-1.3b #generator = pipeline('text-generation', model="microsoft/DialoGPT-medium") tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium') original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium') untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500') untethered_paraphrased_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240227_epochs_350') default_num_return_sequences=5 default_temperature=0.5 default_repetition_penalty=1.5 default_top_p=2 default_top_k=50 default_do_sample=True default_seed=42 def create_response(input_str, # num_beams, num_return_sequences, temperature, repetition_penalty, top_p, top_k, do_sample, seed, model_name): print("input_str="+input_str) print("model_name="+str(model_name)) # num_beams = int(num_beams) # print("num_beams=" + str(num_beams)) num_return_sequences=int(num_return_sequences) print("num_return_sequences" + str(num_return_sequences)) print("top_p" + str(top_p)) top_k=int(top_k) print("top_k" + str(top_k)) print("repetition_penalty" + str(repetition_penalty)) print("temperature" + str(temperature)) print("do_sample" + str(do_sample)) if not do_sample: num_beams = 1 print("num_beams=" + str(num_beams)) seed=int(seed) print("seed" + str(seed)) encoded = tokenizer.encode_plus(input_str + tokenizer.eos_token, return_tensors="pt") input_ids = encoded["input_ids"] #attention_mask = encoded["attention_mask"] if seed != -1: set_seed(seed) if model_name == "original_model": output = original_model.generate(input_ids,do_sample=do_sample, max_length=100, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences,return_dict_in_generate=True, output_scores=True ) transition_scores = original_model.compute_transition_scores(output.sequences, output.scores, normalize_logits=False) elif model_name == "untethered_model": output = untethered_model.generate(input_ids, do_sample=do_sample, max_length=100, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences,return_dict_in_generate=True, output_scores=True ) transition_scores = untethered_model.compute_transition_scores(output.sequences, output.scores, normalize_logits=False) elif model_name == "untethered_paraphrased_model": output = untethered_paraphrased_model.generate(input_ids, do_sample=do_sample, max_length=100, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences,return_dict_in_generate=True, output_scores=True ) transition_scores = untethered_paraphrased_model.compute_transition_scores(output.sequences, output.scores, normalize_logits=False) else: output = original_model.generate(input_ids,do_sample=do_sample, max_length=100, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences,return_dict_in_generate=True, output_scores=True ) transition_scores = original_model.compute_transition_scores(output.sequences, output.scores, normalize_logits=False) score_list = [] for scores in transition_scores: # exp_tensor_list = torch.exp(scores) # print(scores) # print(exp_tensor_list) sum_exp_tensor_list = torch.sum(scores) len_exp_tensor_list = torch.numel(scores) average_exp_tensor_list = sum_exp_tensor_list / len_exp_tensor_list print(average_exp_tensor_list) score_list.append(average_exp_tensor_list.item()) outputs = model_name+" generated
" for index, output_id in enumerate(output.sequences): sentence = tokenizer.decode(output_id, skip_special_tokens=True) #print(sentence + " score:" + str(score_list[index])) outputs = outputs + sentence + " score:" + str(score_list[index]) +"
" # for output_id in output_ids: # output = tokenizer.decode(output_id, skip_special_tokens=True) # outputs=outputs+output+"
" return outputs common_examples_string="
Sample Inputs:
What is death?
One of the best teachers in all of life turns out to be what?
what is your most meaningful relationship?\nWhat actually gives life meaning?
" common_examples=[ ["What is death?",default_num_return_sequences,default_temperature,default_repetition_penalty,default_top_p,default_top_k,default_do_sample], # The first example ["One of the best teachers in all of life turns out to be what?",default_num_return_sequences,default_temperature,default_repetition_penalty,default_top_p,default_top_k,default_do_sample], # The second example ["what is your most meaningful relationship?",default_num_return_sequences,default_temperature,default_repetition_penalty,default_top_p,default_top_k,default_do_sample], # The third example ["What actually gives life meaning?",default_num_return_sequences,default_temperature,default_repetition_penalty,default_top_p,default_top_k,default_do_sample] ] examples = copy.deepcopy(common_examples) print(examples) for example in examples: example.append("original_model") print(examples) interface_original = gr.Interface(fn=create_response, title="original", description="original language model, no fine tuning"+common_examples_string, #examples=examples, inputs=[ gr.Textbox(label="input text here", lines=3), # gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " + # "likely ones (specified in num_beams)", value=7), gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output", value=default_num_return_sequences), gr.Number( label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" + " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" + " makes the output more deterministic and focused", value=default_temperature), gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " + "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" + "in more varied and non-repetitive output.", value=default_repetition_penalty), gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" + " to reach a certain threshold", value=default_top_p), gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" + "This means that only the tokens with the highest probabilities are considered for sampling" + "This reduces the diversity of the generated sequences, "+ "but also makes them more likely to be coherent and fluent.", value=default_top_k), gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " + "which means that it will select the word with the highest probability at each step. " + "This results in a deterministic and fluent output, but it might also lack diversity and creativity" + "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" + " select a word from the probability distribution at each step. This results in a more diverse and creative" + " output, but it might also introduce errors and inconsistencies ", value=default_do_sample), gr.Number( label="seed (integer) random seed, set to -1 to use a random seed everytime", value=default_seed), gr.Textbox(label="model", lines=3, value="original_model",visible=False) ], outputs="html" ) examples = copy.deepcopy(common_examples) print(examples) for example in examples: example.append("untethered_model") print(examples) interface_untethered_model = gr.Interface(fn=create_response, title="untethered model", description="language model fine tuned with'The Untethered Soul' chapter 17"+common_examples_string, #examples=examples, inputs=[ gr.Textbox(label="input text here", lines=3), # gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " + # "likely ones (specified in num_beams)", value=7), gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output", value=default_num_return_sequences), gr.Number( label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" + " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" + " makes the output more deterministic and focused", value=default_temperature), gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " + "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" + "in more varied and non-repetitive output.", value=default_repetition_penalty), gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" + " to reach a certain threshold", value=default_top_p), gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" + "This means that only the tokens with the highest probabilities are considered for sampling" + "This reduces the diversity of the generated sequences, "+ "but also makes them more likely to be coherent and fluent.", value=default_top_k), gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " + "which means that it will select the word with the highest probability at each step. " + "This results in a deterministic and fluent output, but it might also lack diversity and creativity" + "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" + " select a word from the probability distribution at each step. This results in a more diverse and creative" + " output, but it might also introduce errors and inconsistencies ", value=default_do_sample), gr.Number( label="seed (integer) random seed, set to -1 to use a random seed everytime", value=default_seed), gr.Textbox(label="model", lines=3, value="untethered_model",visible=False) ], outputs="html" ) examples = copy.deepcopy(common_examples) print(examples) for example in examples: example.append("untethered_paraphrased_model") print(examples) interface_untethered_paraphrased_model = gr.Interface(fn=create_response, title="untethered paraphrased_model", description="language model fine tuned with'The Untethered Soul' chapter 17 paraphrased"+common_examples_string, #examples=examples, inputs=[ gr.Textbox(label="input text here", lines=3), # gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " + # "likely ones (specified in num_beams)", value=7), gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output", value=default_num_return_sequences), gr.Number( label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" + " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" + " makes the output more deterministic and focused", value=default_temperature), gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " + "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" + "in more varied and non-repetitive output.", value=default_repetition_penalty), gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" + " to reach a certain threshold", value=default_top_p), gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" + "This means that only the tokens with the highest probabilities are considered for sampling" + "This reduces the diversity of the generated sequences, "+ "but also makes them more likely to be coherent and fluent.", value=default_top_k), gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " + "which means that it will select the word with the highest probability at each step. " + "This results in a deterministic and fluent output, but it might also lack diversity and creativity" + "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" + " select a word from the probability distribution at each step. This results in a more diverse and creative" + " output, but it might also introduce errors and inconsistencies ", value=default_do_sample), gr.Number( label="seed (integer) random seed, set to -1 to use a random seed everytime", value=default_seed), gr.Textbox(label="model", lines=3, value="untethered_paraphrased_model",visible=False) ], outputs= "html" ) demo = gr.TabbedInterface([interface_original, interface_untethered_model, interface_untethered_paraphrased_model], ["Original", "Untethered", "Untethered paraphrased"]) demo.launch()