Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline | |
import torch | |
nstruct_pipeline_3b = pipeline(model="fnlp/moss-moon-003-sft-int8", torch_dtype=torch.float, trust_remote_code=True, | |
device_map="auto") | |
def generate(query, temperature, top_p, top_k, max_new_tokens): | |
return nstruct_pipeline_3b(query, temperature, top_p, top_k, max_new_tokens) | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
"""<h1><center>Databricks Dolly LLMs</center></h1> | |
This demo compares the smaller two variants of the Databricks Dolly models, the [2.8B](https://huggingface.co/databricks/dolly-v2-3b), and the [6.9B](https://huggingface.co/databricks/dolly-v2-7b). They are all based on the EluetherAI's Pythia models fine-tuned with approx [15K instruction demonstrations](https://huggingface.co/datasets/HuggingFaceH4/databricks_dolly_15k) | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input") | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
temperature = gr.Slider( | |
label="Temperature", | |
value=0.5, | |
minimum=0.0, | |
maximum=2.0, | |
step=0.1, | |
interactive=True, | |
info="Higher values produce more diverse outputs", | |
) | |
with gr.Column(): | |
with gr.Row(): | |
top_p = gr.Slider( | |
label="Top-p (nucleus sampling)", | |
value=0.95, | |
minimum=0.0, | |
maximum=1, | |
step=0.05, | |
interactive=True, | |
info="Higher values sample fewer low-probability tokens", | |
) | |
with gr.Column(): | |
with gr.Row(): | |
top_k = gr.Slider( | |
label="Top-k", | |
value=50, | |
minimum=0.0, | |
maximum=100, | |
step=1, | |
interactive=True, | |
info="Sample from a shortlist of top-k tokens", | |
) | |
with gr.Column(): | |
with gr.Row(): | |
max_new_tokens = gr.Slider( | |
label="Maximum new tokens", | |
value=256, | |
minimum=0, | |
maximum=2048, | |
step=5, | |
interactive=True, | |
info="The maximum number of new tokens to generate", | |
) | |
with gr.Row(): | |
submit = gr.Button("Generate Answers") | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Box(): | |
gr.Markdown("**Dolly 3B**") | |
output_3b = gr.Markdown() | |
with gr.Column(): | |
with gr.Box(): | |
gr.Markdown("**Dolly 7B**") | |
output_7b = gr.Markdown() | |
# with gr.Column(): | |
# with gr.Box(): | |
# gr.Markdown("**Dolly 12B**") | |
# output_12b = gr.Markdown() | |
submit.click(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens], | |
outputs=[output_3b, output_7b]) | |
instruction.submit(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens], | |
outputs=[output_3b, output_7b]) | |
demo.launch() | |