Daytona-Beach-Ambassador

Paused

Kanan Hooper

feat: clean up the intro text

67c04f8 over 1 year ago

8.73 kB

	from typing import Iterator

	import gradio as gr
	import torch

	from daytona import DAYTONA_smaller

	from model import get_input_token_length, run


	DEFAULT_SYSTEM_PROMPT = """\
	You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\
	"""
	DEFAULT_SYSTEM_PROMPT = DAYTONA_smaller


	MAX_MAX_NEW_TOKENS = 2048
	DEFAULT_MAX_NEW_TOKENS = 1024
	MAX_INPUT_TOKEN_LENGTH = 4000

	DESCRIPTION = """
	# Daytona Beach Ambassador

	This Space demonstrates a [long prompt](https://huggingface.co/spaces/kananj/Daytona-Beach-Ambassador/blob/main/daytona.py) running on the base model [Llama-2-13b-chat](https://huggingface.co/meta-llama/Llama-2-13b-chat).

	The prompt instructs the AI to be "helpful, respectful and honest ambassador for Daytona Beach Florida". The prompt then loads in the [Wikipedia page for Daytona Beach, Florida](https://en.wikipedia.org/wiki/Daytona_Beach,_Florida), slight shortened so it can fit into the token limit.

	It will attempt to answer questions about Daytona Beach, while also encouraging you to visit!
	"""

	LICENSE = """
	<p/>

	---
	As a derivate work of [Llama-2-13b-chat](https://huggingface.co/meta-llama/Llama-2-13b-chat) by Meta,
	this demo is governed by the original [license](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/spaces/huggingface-projects/llama-2-13b-chat/blob/main/USE_POLICY.md).
	"""

	if not torch.cuda.is_available():
	DESCRIPTION += '\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>'


	def clear_and_save_textbox(message: str) -> tuple[str, str]:
	return '', message


	def display_input(message: str,
	history: list[tuple[str, str]]) -> list[tuple[str, str]]:
	history.append((message, ''))
	return history


	def delete_prev_fn(
	history: list[tuple[str, str]]) -> tuple[list[tuple[str, str]], str]:
	try:
	message, _ = history.pop()
	except IndexError:
	message = ''
	return history, message or ''


	def generate(
	message: str,
	history_with_input: list[tuple[str, str]],
	system_prompt: str,
	max_new_tokens: int,
	temperature: float,
	top_p: float,
	top_k: int,
	) -> Iterator[list[tuple[str, str]]]:
	if max_new_tokens > MAX_MAX_NEW_TOKENS:
	raise ValueError

	history = history_with_input[:-1]
	generator = run(message, history, system_prompt, max_new_tokens, temperature, top_p, top_k)
	try:
	first_response = next(generator)
	yield history + [(message, first_response)]
	except StopIteration:
	yield history + [(message, '')]
	for response in generator:
	yield history + [(message, response)]


	def process_example(message: str) -> tuple[str, list[tuple[str, str]]]:
	generator = generate(message, [], DEFAULT_SYSTEM_PROMPT, 1024, 1, 0.95, 50)
	for x in generator:
	pass
	return '', x


	def check_input_token_length(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> None:
	input_token_length = get_input_token_length(message, chat_history, system_prompt)
	if input_token_length > MAX_INPUT_TOKEN_LENGTH:
	raise gr.Error(f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.')


	with gr.Blocks(css='style.css') as demo:
	gr.Markdown(DESCRIPTION)
	gr.DuplicateButton(value='Duplicate Space for private use',
	elem_id='duplicate-button')

	with gr.Group():
	chatbot = gr.Chatbot(label='Chatbot')
	with gr.Row():
	textbox = gr.Textbox(
	container=False,
	show_label=False,
	placeholder='Type a message...',
	scale=10,
	)
	submit_button = gr.Button('Submit',
	variant='primary',
	scale=1,
	min_width=0)
	with gr.Row():
	retry_button = gr.Button('🔄 Retry', variant='secondary')
	undo_button = gr.Button('↩️ Undo', variant='secondary')
	clear_button = gr.Button('🗑️ Clear', variant='secondary')

	saved_input = gr.State()

	with gr.Accordion(label='Advanced options', open=False):
	system_prompt = gr.Textbox(label='System prompt',
	value=DEFAULT_SYSTEM_PROMPT,
	lines=6)
	max_new_tokens = gr.Slider(
	label='Max new tokens',
	minimum=1,
	maximum=MAX_MAX_NEW_TOKENS,
	step=1,
	value=DEFAULT_MAX_NEW_TOKENS,
	)
	temperature = gr.Slider(
	label='Temperature',
	minimum=0.1,
	maximum=4.0,
	step=0.1,
	value=1.0,
	)
	top_p = gr.Slider(
	label='Top-p (nucleus sampling)',
	minimum=0.05,
	maximum=1.0,
	step=0.05,
	value=0.95,
	)
	top_k = gr.Slider(
	label='Top-k',
	minimum=1,
	maximum=1000,
	step=1,
	value=50,
	)

	gr.Examples(
	examples=[
	'Hello there! Tell me about Daytona Beach?',
	'When is the best time to visit Daytona Beach?',
	'Tell me about the sports located in Daytona Beach.',
	'How many beaches are in the Daytona Beach area?',
	"Write a 100-word article on 'Why my family should visit Daytona Beach'",
	],
	inputs=textbox,
	outputs=[textbox, chatbot],
	fn=process_example,
	cache_examples=True,
	)

	gr.Markdown(LICENSE)

	textbox.submit(
	fn=clear_and_save_textbox,
	inputs=textbox,
	outputs=[textbox, saved_input],
	api_name=False,
	queue=False,
	).then(
	fn=display_input,
	inputs=[saved_input, chatbot],
	outputs=chatbot,
	api_name=False,
	queue=False,
	).then(
	fn=check_input_token_length,
	inputs=[saved_input, chatbot, system_prompt],
	api_name=False,
	queue=False,
	).success(
	fn=generate,
	inputs=[
	saved_input,
	chatbot,
	system_prompt,
	max_new_tokens,
	temperature,
	top_p,
	top_k,
	],
	outputs=chatbot,
	api_name=False,
	)

	button_event_preprocess = submit_button.click(
	fn=clear_and_save_textbox,
	inputs=textbox,
	outputs=[textbox, saved_input],
	api_name=False,
	queue=False,
	).then(
	fn=display_input,
	inputs=[saved_input, chatbot],
	outputs=chatbot,
	api_name=False,
	queue=False,
	).then(
	fn=check_input_token_length,
	inputs=[saved_input, chatbot, system_prompt],
	api_name=False,
	queue=False,
	).success(
	fn=generate,
	inputs=[
	saved_input,
	chatbot,
	system_prompt,
	max_new_tokens,
	temperature,
	top_p,
	top_k,
	],
	outputs=chatbot,
	api_name=False,
	)

	retry_button.click(
	fn=delete_prev_fn,
	inputs=chatbot,
	outputs=[chatbot, saved_input],
	api_name=False,
	queue=False,
	).then(
	fn=display_input,
	inputs=[saved_input, chatbot],
	outputs=chatbot,
	api_name=False,
	queue=False,
	).then(
	fn=generate,
	inputs=[
	saved_input,
	chatbot,
	system_prompt,
	max_new_tokens,
	temperature,
	top_p,
	top_k,
	],
	outputs=chatbot,
	api_name=False,
	)

	undo_button.click(
	fn=delete_prev_fn,
	inputs=chatbot,
	outputs=[chatbot, saved_input],
	api_name=False,
	queue=False,
	).then(
	fn=lambda x: x,
	inputs=[saved_input],
	outputs=textbox,
	api_name=False,
	queue=False,
	)

	clear_button.click(
	fn=lambda: ([], ''),
	outputs=[chatbot, saved_input],
	queue=False,
	api_name=False,
	)

	demo.queue(max_size=20).launch()