import gradio as gr import random import os from huggingface_hub import login from transformers import pipeline from transformers import GPT2Tokenizer, GPT2LMHeadModel login(os.environ["HF_TOKEN"]) #https://huggingface.co/facebook/opt-1.3b #generator = pipeline('text-generation', model="microsoft/DialoGPT-medium") tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium') original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium') fine_tuned_model = GPT2LMHeadModel.from_pretrained('zmbfeng/FineTune-1') def create_response_original(input_str, num_beams, num_return_sequences, temperature, repetition_penalty, top_p, top_k, do_sample): print("num_beams=" + str(num_beams)) print("num_return_sequences" + str(num_return_sequences)) print("top_p" + str(top_p)) print("top_k" + str(top_k)) print("repetition_penalty" + str(repetition_penalty)) print("temperature" + str(temperature)) print("do_sample" + str(do_sample)) #output_raw= generator(input_str) """print (output_raw)""" #output_str = output_raw[0]['generated_text'] #output_str = output_str.replace("\n", "") #output_str = output_str.replace(input_str, "") #output_str = tokenizer.decode(model.generate(**tokenizer("What are John West's hobbies?"+tokenizer.eos_token,return_tensors="pt",max_length=200))[0]) # output_str = tokenizer.decode(original_model.generate(**tokenizer(input_str+tokenizer.eos_token,return_tensors="pt",max_length=200), # num_beams=num_beams, # num_return_sequences=num_return_sequences)[0]) input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt") output_ids = fine_tuned_model.generate(input_ids,do_sample=True, max_length=100, temperature=0.2, top_p=0.9, repetition_penalty=1.5,num_return_sequences=6) outputs = [] for output_id in output_ids: output = tokenizer.decode(output_id, skip_special_tokens=True) outputs.append(output) return outputs def create_response_fine_tuned(input_str): #output_raw= generator(input_str) """print (output_raw)""" #output_str = output_raw[0]['generated_text'] #output_str = output_str.replace("\n", "") #output_str = output_str.replace(input_str, "") #output_str = tokenizer.decode(model.generate(**tokenizer("What are John West's hobbies?"+tokenizer.eos_token,return_tensors="pt",max_length=200))[0]) output_str = tokenizer.decode(fine_tuned_model.generate(**tokenizer(input_str+tokenizer.eos_token,return_tensors="pt",max_length=200))[0]) return (output_str) interface1 = gr.Interface(fn=create_response_original, title="original", description="original language model, no fine tuning", examples=[ ["What is death?"], # The first example ["One of the best teachers in all of life turns out to be what?"], # The second example ["what is your most meaningful relationship?"], # The third example ["What actually gives life meaning?"] ], inputs=[ gr.Textbox(label="input text here", lines=3), gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " + "likely ones (specified in num_beams)", value=7), gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output", value=5), gr.Number( label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" + " (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" + " makes the output more deterministic and focused", value=0.2), gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " + "making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" + "in more varied and non-repetitive output.", value=1.5), gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" + " to reach a certain threshold", value=0.9), gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" + "This means that only the tokens with the highest probabilities are considered for sampling" + "This reduces the diversity of the generated sequences, "+ "but also makes them more likely to be coherent and fluent.", value=50), gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " + "which means that it will select the word with the highest probability at each step. " + "This results in a deterministic and fluent output, but it might also lack diversity and creativity" + "If is set to True, the generate function will use stochastic sampling, which means that it will randomly" + " select a word from the probability distribution at each step. This results in a more diverse and creative" + " output, but it might also introduce errors and inconsistencies ", value=True) ], outputs="list") interface2 = gr.Interface(fn=create_response_fine_tuned, inputs="text", outputs="text", title="Fine Tuned") demo = gr.TabbedInterface([interface1, interface2], ["Original", "Fine Tuned"]) # with gr.Blocks() as demo: # with gr.Row(): # demo.launch()