import gradio as gr from transformers import pipeline import torch nstruct_pipeline_3b = pipeline(model="fnlp/moss-moon-003-sft-int8", torch_dtype=torch.float, trust_remote_code=True, device_map="auto") def generate(query, temperature, top_p, top_k, max_new_tokens): return nstruct_pipeline_3b(query, temperature, top_p, top_k, max_new_tokens) with gr.Blocks() as demo: gr.Markdown( """

Databricks Dolly LLMs

This demo compares the smaller two variants of the Databricks Dolly models, the [2.8B](https://huggingface.co/databricks/dolly-v2-3b), and the [6.9B](https://huggingface.co/databricks/dolly-v2-7b). They are all based on the EluetherAI's Pythia models fine-tuned with approx [15K instruction demonstrations](https://huggingface.co/datasets/HuggingFaceH4/databricks_dolly_15k) """ ) with gr.Row(): with gr.Column(): with gr.Row(): instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input") with gr.Row(): with gr.Column(): with gr.Row(): temperature = gr.Slider( label="Temperature", value=0.5, minimum=0.0, maximum=2.0, step=0.1, interactive=True, info="Higher values produce more diverse outputs", ) with gr.Column(): with gr.Row(): top_p = gr.Slider( label="Top-p (nucleus sampling)", value=0.95, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample fewer low-probability tokens", ) with gr.Column(): with gr.Row(): top_k = gr.Slider( label="Top-k", value=50, minimum=0.0, maximum=100, step=1, interactive=True, info="Sample from a shortlist of top-k tokens", ) with gr.Column(): with gr.Row(): max_new_tokens = gr.Slider( label="Maximum new tokens", value=256, minimum=0, maximum=2048, step=5, interactive=True, info="The maximum number of new tokens to generate", ) with gr.Row(): submit = gr.Button("Generate Answers") with gr.Row(): with gr.Column(): with gr.Box(): gr.Markdown("**Dolly 3B**") output_3b = gr.Markdown() with gr.Column(): with gr.Box(): gr.Markdown("**Dolly 7B**") output_7b = gr.Markdown() # with gr.Column(): # with gr.Box(): # gr.Markdown("**Dolly 12B**") # output_12b = gr.Markdown() submit.click(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens], outputs=[output_3b, output_7b]) instruction.submit(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens], outputs=[output_3b, output_7b]) demo.launch()