Spaces:

zmbfeng
/

testchatbot

Sleeping

App Files Files Community

testchatbot / app.py

zmbfeng

Update app.py

1d40f83 verified over 1 year ago

raw

history blame

18.9 kB

	import gradio as gr
	import random
	import os
	from huggingface_hub import login
	from transformers import pipeline
	from transformers import GPT2Tokenizer, GPT2LMHeadModel
	login(os.environ["HF_TOKEN"])
	#https://huggingface.co/facebook/opt-1.3b
	#generator = pipeline('text-generation', model="microsoft/DialoGPT-medium")
	tokenizer = GPT2Tokenizer.from_pretrained('microsoft/DialoGPT-medium')
	original_model = GPT2LMHeadModel.from_pretrained('microsoft/DialoGPT-medium')
	untethered_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240225_epochs_500')
	untethered_paraphrased_model = GPT2LMHeadModel.from_pretrained('zmbfeng/untethered_20240227_epochs_350')

	def create_response_untethered_paraphrased(input_str,
	# num_beams,
	num_return_sequences,
	temperature,
	repetition_penalty,
	top_p,
	# top_k,
	do_sample):
	print("input_str="+input_str)
	# num_beams = int(num_beams)
	# print("num_beams=" + str(num_beams))
	num_return_sequences=int(num_return_sequences)
	print("num_return_sequences" + str(num_return_sequences))
	print("top_p" + str(top_p))
	# top_k=int(top_k)
	#print("top_k" + str(top_k))
	print("repetition_penalty" + str(repetition_penalty))
	print("temperature" + str(temperature))
	print("do_sample" + str(do_sample))
	if not do_sample:
	num_beams = 1
	print("num_beams=" + str(num_beams))

	#output_raw= generator(input_str)
	"""print (output_raw)"""

	#output_str = output_raw[0]['generated_text']
	#output_str = output_str.replace("\n", "")
	#output_str = output_str.replace(input_str, "")
	#output_str = tokenizer.decode(model.generate(**tokenizer("What are John West's hobbies?"+tokenizer.eos_token,return_tensors="pt",max_length=200))[0])
	# output_str = tokenizer.decode(original_model.generate(**tokenizer(input_str+tokenizer.eos_token,return_tensors="pt",max_length=200),
	# num_beams=num_beams,
	# num_return_sequences=num_return_sequences)[0])

	#input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt")
	encoded = tokenizer.encode_plus(input_str + tokenizer.eos_token, return_tensors="pt")
	input_ids = encoded["input_ids"]
	attention_mask = encoded["attention_mask"]
	#output_ids = fine_tuned_model.generate(input_ids,do_sample=True, max_length=100, temperature=0.2, top_p=0.9, repetition_penalty=1.5,num_return_sequences=6)
	#output_ids = untethered_paraphrased_model.generate(input_ids,do_sample=do_sample, max_length=100, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences, num_beams = num_beams)
	output_ids = untethered_paraphrased_model.generate(input_ids,pad_token_id=tokenizer.eos_token_id,do_sample=True,attention_mask=attention_mask, max_length=100, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences)
	outputs = ""
	for output_id in output_ids:
	output = tokenizer.decode(output_id, skip_special_tokens=True)
	outputs=outputs+output+"<br/>"
	return outputs



	def create_response_untethered(input_str,
	# num_beams,
	num_return_sequences,
	temperature,
	repetition_penalty,
	top_p,
	# top_k,
	do_sample,
	model_name):
	print("input_str="+input_str)
	print("model_name="+str(model_name))
	# num_beams = int(num_beams)
	# print("num_beams=" + str(num_beams))
	num_return_sequences=int(num_return_sequences)
	print("num_return_sequences" + str(num_return_sequences))
	print("top_p" + str(top_p))
	# top_k=int(top_k)
	# print("top_k" + str(top_k))
	print("repetition_penalty" + str(repetition_penalty))
	print("temperature" + str(temperature))
	print("do_sample" + str(do_sample))
	if not do_sample:
	num_beams = 1
	print("num_beams=" + str(num_beams))

	#output_raw= generator(input_str)
	"""print (output_raw)"""

	#output_str = output_raw[0]['generated_text']
	#output_str = output_str.replace("\n", "")
	#output_str = output_str.replace(input_str, "")
	#output_str = tokenizer.decode(model.generate(**tokenizer("What are John West's hobbies?"+tokenizer.eos_token,return_tensors="pt",max_length=200))[0])
	# output_str = tokenizer.decode(original_model.generate(**tokenizer(input_str+tokenizer.eos_token,return_tensors="pt",max_length=200),
	# num_beams=num_beams,
	# num_return_sequences=num_return_sequences)[0])

	#input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt")
	encoded = tokenizer.encode_plus(input_str + tokenizer.eos_token, return_tensors="pt")
	input_ids = encoded["input_ids"]
	attention_mask = encoded["attention_mask"]


	#output_ids = fine_tuned_model.generate(input_ids,do_sample=True, max_length=100, temperature=0.2, top_p=0.9, repetition_penalty=1.5,num_return_sequences=6)
	output_ids = untethered_model.generate(input_ids,pad_token_id=tokenizer.eos_token_id,do_sample=do_sample, attention_mask=attention_mask, max_length=100, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences )
	outputs = ""
	for output_id in output_ids:
	output = tokenizer.decode(output_id, skip_special_tokens=True)
	outputs=outputs+output+"\\n"
	return outputs

	def create_response_original(input_str,
	# num_beams,
	num_return_sequences,
	temperature,
	repetition_penalty,
	top_p,
	# top_k,
	do_sample):
	print("input_str="+input_str)
	# num_beams = int(num_beams)
	# print("num_beams=" + str(num_beams))
	num_return_sequences=int(num_return_sequences)
	print("num_return_sequences" + str(num_return_sequences))
	print("top_p" + str(top_p))
	# top_k=int(top_k)
	# print("top_k" + str(top_k))
	print("repetition_penalty" + str(repetition_penalty))
	print("temperature" + str(temperature))
	print("do_sample" + str(do_sample))
	if not do_sample:
	num_beams = 1
	print("num_beams=" + str(num_beams))

	#output_raw= generator(input_str)
	"""print (output_raw)"""

	#output_str = output_raw[0]['generated_text']
	#output_str = output_str.replace("\n", "")
	#output_str = output_str.replace(input_str, "")
	#output_str = tokenizer.decode(model.generate(**tokenizer("What are John West's hobbies?"+tokenizer.eos_token,return_tensors="pt",max_length=200))[0])
	# output_str = tokenizer.decode(original_model.generate(**tokenizer(input_str+tokenizer.eos_token,return_tensors="pt",max_length=200),
	# num_beams=num_beams,
	# num_return_sequences=num_return_sequences)[0])

	#input_ids = tokenizer.encode(input_str + tokenizer.eos_token, return_tensors="pt")
	encoded = tokenizer.encode_plus(input_str + tokenizer.eos_token, return_tensors="pt")
	input_ids = encoded["input_ids"]
	attention_mask = encoded["attention_mask"]
	#output_ids = fine_tuned_model.generate(input_ids,do_sample=True, max_length=100, temperature=0.2, top_p=0.9, repetition_penalty=1.5,num_return_sequences=6)
	output_ids = original_model.generate(input_ids,pad_token_id=tokenizer.eos_token_id,do_sample=do_sample,attention_mask=attention_mask, max_length=100, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty,num_return_sequences=num_return_sequences)
	outputs = ""
	for output_id in output_ids:
	output = tokenizer.decode(output_id, skip_special_tokens=True)
	outputs=outputs+output+"\\n"
	return outputs

	def create_response_fine_tuned(input_str):
	#output_raw= generator(input_str)
	"""print (output_raw)"""

	#output_str = output_raw[0]['generated_text']
	#output_str = output_str.replace("\n", "")
	#output_str = output_str.replace(input_str, "")
	#output_str = tokenizer.decode(model.generate(**tokenizer("What are John West's hobbies?"+tokenizer.eos_token,return_tensors="pt",max_length=200))[0])
	output_str = tokenizer.decode(fine_tuned_model.generate(**tokenizer(input_str+tokenizer.eos_token,return_tensors="pt",max_length=200))[0])
	return (output_str)


	interface1 = gr.Interface(fn=create_response_original,
	title="original",
	description="original language model, no fine tuning",
	examples=[
	["What is death?",5,0.2,1.5,0.9,True], # The first example
	["One of the best teachers in all of life turns out to be what?",5,0.2,1.5,0.9,True], # The second example
	["what is your most meaningful relationship?",5,0.2,1.5,0.9,True], # The third example
	["What actually gives life meaning?",5,0.2,1.5,0.9,True]
	],
	inputs=[
	gr.Textbox(label="input text here", lines=3),
	# gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
	# "likely ones (specified in num_beams)", value=7),
	gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output",
	value=5),
	gr.Number(
	label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
	" (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
	" makes the output more deterministic and focused",
	value=0.2),
	gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
	"making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
	"in more varied and non-repetitive output.",
	value=1.5),
	gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
	" to reach a certain threshold",
	value=0.9),
	# gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
	# "This means that only the tokens with the highest probabilities are considered for sampling" +
	# "This reduces the diversity of the generated sequences, "+
	# "but also makes them more likely to be coherent and fluent.",
	# value=50),
	gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
	"which means that it will select the word with the highest probability at each step. " +
	"This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
	"If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
	" select a word from the probability distribution at each step. This results in a more diverse and creative" +
	" output, but it might also introduce errors and inconsistencies ", value=True)
	], outputs=[gr.Textbox(label="output response", lines=30)])


	interface2 = gr.Interface(fn=create_response_untethered,
	title="untethered",
	description="untethered fine tuning",
	examples=[
	["What is death?",5,0.2,1.5,0.9,True], # The first example
	["One of the best teachers in all of life turns out to be what?",5,0.2,1.5,0.9,True], # The second example
	["what is your most meaningful relationship?",5,0.2,1.5,0.9,True], # The third example
	["What actually gives life meaning?",5,0.2,1.5,0.9,True]
	],
	inputs=[
	gr.Textbox(label="input text here", lines=3),
	# gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
	# "likely ones (specified in num_beams)", value=7),
	gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output",
	value=5),
	gr.Number(
	label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
	" (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
	" makes the output more deterministic and focused",
	value=0.2),
	gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
	"making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
	"in more varied and non-repetitive output.",
	value=1.5),
	gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
	" to reach a certain threshold",
	value=0.9),
	# gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
	# "This means that only the tokens with the highest probabilities are considered for sampling" +
	# "This reduces the diversity of the generated sequences, "+
	# "but also makes them more likely to be coherent and fluent.",
	# value=50),
	gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
	"which means that it will select the word with the highest probability at each step. " +
	"This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
	"If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
	" select a word from the probability distribution at each step. This results in a more diverse and creative" +
	" output, but it might also introduce errors and inconsistencies ", value=True),
	gr.Textbox(label="model", lines=3, value="untethered_model_name")
	#,visible=False
	], outputs=[gr.Textbox(label="output response", lines=30)])

	interface3 = gr.Interface(fn=create_response_untethered_paraphrased,
	title="untethered paraphrased",
	description="untethered paraphrased fine tuning",
	examples=[
	["What is death?",5,0.2,1.5,0.9,True], # The first example
	["One of the best teachers in all of life turns out to be what?",5,0.2,1.5,0.9,True], # The second example
	["what is your most meaningful relationship?",5,0.2,1.5,0.9,True], # The third example
	["What actually gives life meaning?",5,0.2,1.5,0.9,True]
	],
	inputs=[
	gr.Textbox(label="input text here", lines=3),
	# gr.Number(label="num_beams (integer) explores the specified number of possible outputs and selects the most " +
	# "likely ones (specified in num_beams)", value=7),
	gr.Number(label="num_return_sequences (integer) the number of outputs selected from num_beams possible output",
	value=5),
	gr.Number(
	label="temperature (decimal) controls the creativity or randomness of the output. A higher temperature" +
	" (e.g., 0.9) results in more diverse and creative output, while a lower temperature (e.g., 0.2)" +
	" makes the output more deterministic and focused",
	value=0.2),
	gr.Number(label="repetition_penalty (decimal) penalizes words that have already appeared in the output, " +
	"making them less likely to be generated again. A higher repetition_penalty (e.g., 1.5) results" +
	"in more varied and non-repetitive output.",
	value=1.5),
	gr.Number(label="top_p (decimal) the model will only consider the words that have a high enough probability" +
	" to reach a certain threshold",
	value=0.9),
	# gr.Number(label="top_k (integer) The number of highest probability vocabulary word will be considered" +
	# "This means that only the tokens with the highest probabilities are considered for sampling" +
	# "This reduces the diversity of the generated sequences, "+
	# "but also makes them more likely to be coherent and fluent.",
	# value=50),
	gr.Checkbox(label="do_sample. If is set to False, num_return_sequences must be 1 because the generate function will use greedy decoding, " +
	"which means that it will select the word with the highest probability at each step. " +
	"This results in a deterministic and fluent output, but it might also lack diversity and creativity" +
	"If is set to True, the generate function will use stochastic sampling, which means that it will randomly" +
	" select a word from the probability distribution at each step. This results in a more diverse and creative" +
	" output, but it might also introduce errors and inconsistencies ", value=True)
	], outputs="html")
	#[gr.Textbox(label="output response", lines=30)]






	#interface2 = gr.Interface(fn=create_response_fine_tuned, inputs="text", outputs="text", title="Fine Tuned")
	demo = gr.TabbedInterface([interface1, interface2, interface3], ["Original", "Untethered", "Untethered paraphrased"])
	#demo = gr.TabbedInterface([interface1, interface3], ["Original", "Untethered paraphrased"])
	# with gr.Blocks() as demo:
	# with gr.Row():
	#
	demo.launch()