Spaces:

xiaowang7777
/

fnlp-moss-moon-003-sft-int8

Runtime error

App Files Files Community

fnlp-moss-moon-003-sft-int8 / app.py

xiaowang7777

fush

9e169b0 over 1 year ago

raw

history blame

3.16 kB

	import gradio as gr
	from transformers import pipeline
	import torch

	nstruct_pipeline_3b = pipeline(model="fnlp/moss-moon-003-sft-int8", torch_dtype=torch.float, trust_remote_code=True,
	device_map="auto")


	def generate(query, temperature, top_p, top_k, max_new_tokens):
	return nstruct_pipeline_3b(query, temperature, top_p, top_k, max_new_tokens)


	with gr.Blocks() as demo:
	gr.Markdown(
	"""<h1><center>Databricks Dolly LLMs</center></h1>

	This demo compares the smaller two variants of the Databricks Dolly models, the [2.8B](https://huggingface.co/databricks/dolly-v2-3b), and the [6.9B](https://huggingface.co/databricks/dolly-v2-7b). They are all based on the EluetherAI's Pythia models fine-tuned with approx [15K instruction demonstrations](https://huggingface.co/datasets/HuggingFaceH4/databricks_dolly_15k)
	"""
	)
	with gr.Row():
	with gr.Column():
	with gr.Row():
	instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")
	with gr.Row():
	with gr.Column():
	with gr.Row():
	temperature = gr.Slider(
	label="Temperature",
	value=0.5,
	minimum=0.0,
	maximum=2.0,
	step=0.1,
	interactive=True,
	info="Higher values produce more diverse outputs",
	)
	with gr.Column():
	with gr.Row():
	top_p = gr.Slider(
	label="Top-p (nucleus sampling)",
	value=0.95,
	minimum=0.0,
	maximum=1,
	step=0.05,
	interactive=True,
	info="Higher values sample fewer low-probability tokens",
	)
	with gr.Column():
	with gr.Row():
	top_k = gr.Slider(
	label="Top-k",
	value=50,
	minimum=0.0,
	maximum=100,
	step=1,
	interactive=True,
	info="Sample from a shortlist of top-k tokens",
	)
	with gr.Column():
	with gr.Row():
	max_new_tokens = gr.Slider(
	label="Maximum new tokens",
	value=256,
	minimum=0,
	maximum=2048,
	step=5,
	interactive=True,
	info="The maximum number of new tokens to generate",
	)
	with gr.Row():
	submit = gr.Button("Generate Answers")
	with gr.Row():
	with gr.Column():
	with gr.Box():
	gr.Markdown("Dolly 3B")
	output_3b = gr.Markdown()
	with gr.Column():
	with gr.Box():
	gr.Markdown("Dolly 7B")
	output_7b = gr.Markdown()
	# with gr.Column():
	# with gr.Box():
	# gr.Markdown("Dolly 12B")
	# output_12b = gr.Markdown()
	submit.click(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens],
	outputs=[output_3b, output_7b])
	instruction.submit(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens],
	outputs=[output_3b, output_7b])

	demo.launch()