Spaces:

slush0
/

petals-playground

Runtime error

App Files Files Community

petals-playground / prompt.py

slush0

Adding stop generating button to Prompt mode.

dac87a3 over 2 years ago

raw

history blame

5.27 kB

	#!/usr/bin/env python
	# or gradio app.py

	import traceback
	import gradio as gr
	import chat_client

	CHAT_URL='ws://chat.petals.ml/api/v2/generate'
	#CHAT_URL='ws://localhost:8000/api/v2/generate'

	def generate(state, *args):
	# Save that we're in generating loop
	state['generate'] = True

	try:
	for x in _generate(state, *args):
	yield x
	finally:
	state['generate'] = False

	def _generate(state, prompt, model, endseq, max_length,
	do_sample, top_k, top_p, temperature,
	add_stoptoken, copy_output):

	try:
	client = chat_client.ModelClient(CHAT_URL)
	client.open_session(f"bigscience/{model}-petals", max_length)
	except Exception:
	print(traceback.format_exc())
	yield state, prompt, "Error: " + traceback.format_exc()
	return

	if add_stoptoken:
	prompt += "</s>" if "bloomz" in model else "\n\n"

	# Translate checkbox items to actual sequences
	seq = []
	for s in endseq:
	if s == "\\n":
	seq.append("\n")
	elif s == "</s>":
	seq.append("</s>")
	elif s == "? (question mark)":
	seq.append("?")
	elif s == ". (dot)":
	seq.append(".")

	# only top_k or top_p can be set
	if top_k == 0:
	top_k = None
	if top_p == 0:
	top_p = None
	if top_p and top_k:
	top_k = None

	if not temperature:
	temperature = 1.0

	prompt2 = prompt
	output = ''

	# This render prompt dialog immediately and
	# don't wait to generator to return first result
	yield [state, prompt2, output]

	try:
	for out in client.generate(prompt,
	max_new_tokens=1,
	do_sample=do_sample,
	temperature=temperature,
	top_k=top_k,
	top_p=top_p,
	extra_stop_sequences=seq
	):

	if not state['generate']:
	client.close_session()
	return

	output += out
	if copy_output:
	prompt2 += out

	yield state, prompt2, output
	except Exception:
	print(traceback.format_exc())
	yield state, prompt, output + "\nError: " + traceback.format_exc()
	return

	def stop(state):
	"""Stops generating."""
	state.update({"generate": False})
	return state

	with gr.Blocks() as iface_prompt:
	gr.Markdown("""Useful for testing raw prompts with zero, one or few-shot prompting.""")

	with gr.Row():
	model = gr.Radio(["bloom", "bloomz", "bloom-7b1"], value='bloom', label="Use model")

	# Additional ending sequence, at which generation shoud stop
	endseq = gr.CheckboxGroup(["\\n", "</s>", "? (question mark)", ". (dot)"],
	value=["\\n", "</s>"], label='Extra end sequences')

	# Maximum length of inference session
	max_length = gr.Radio([64, 128, 256, 512, 1024, 2048], value=512, interactive=True, label="Max length")

	with gr.Row():
	with gr.Column():
	# Switch between sampling and greedy generation
	do_sample = gr.Checkbox(value=True, interactive=True, label="do_sample")

	# Should the app append stop sequence at the end of prompt or should it leave the prompt open?
	add_stoptoken = gr.Checkbox(value=True, interactive=True, label="Automatically add eos token to the prompt.")

	# Only one of top_k and top_p can be set. Requires "do_sample=True" to work.
	top_k = gr.Number(value=0, precision=0, interactive=True, label="top_k")
	top_p = gr.Number(value=0.9, precision=2, interactive=True, label="top_p")
	# TODO num_beams

	# Generation temperature
	temperature = gr.Number(value=0.75, precision=2, interactive=True, label="Temperature")

	prompt = gr.Textbox(lines=3, label='Prompt', placeholder="Prompt Here...")
	state = gr.State({'generate': False})

	with gr.Row():
	button_generate = gr.Button("Generate")
	button_stop = gr.Button("Stop")

	# Automatically copy the output at the end of prompt
	copy_output = gr.Checkbox(label="Output -> Prompt")

	output = gr.Textbox(lines=3, label='Output')

	inputs = [state, prompt, model, endseq, max_length, do_sample,
	top_k, top_p, temperature, add_stoptoken, copy_output]
	outputs = [state, prompt, output]
	button_generate.click(generate, inputs=inputs, outputs=outputs)
	button_stop.click(stop, inputs=[state], outputs=[state])

	examples = gr.Examples(inputs=[prompt, model, do_sample, top_k, top_p, temperature, add_stoptoken],
	examples=[
	["The SQL command to extract all the users whose name starts with A is: ", "bloom-7b1", False, 0, 0, 1, False],
	["The Spanish translation of thank you for your help is: ", "bloom-7b1", False, 0, 0, 1, False],
	["A human talks to a powerful AI that follows the Human's instructions.\n"
	"AI is talkative, friendly, positive and provides detailed answers to any question.</s>\n"
	"Human: Hi!</s>\n"
	"AI: Hi! How can I help you?</s>\n"
	"Human: What's the capital of Portugal?</s>\n"
	"AI: ", "bloomz", True, 0, 0.9, 0.75, False]
	])