Databricks Dolly LLMs

import gradio as gr
from transformers import pipeline
import torch

nstruct_pipeline_3b = pipeline(model="fnlp/moss-moon-003-sft-int8", torch_dtype=torch.float, trust_remote_code=True,
                               device_map="auto")


def generate(query, temperature, top_p, top_k, max_new_tokens):
  return nstruct_pipeline_3b(query, temperature, top_p, top_k, max_new_tokens)


with gr.Blocks() as demo:
  gr.Markdown(
    """<h1><center>Databricks Dolly LLMs</center></h1>

    This demo compares the smaller two variants of the Databricks Dolly models, the [2.8B](https://huggingface.co/databricks/dolly-v2-3b), and the [6.9B](https://huggingface.co/databricks/dolly-v2-7b). They are all based on the EluetherAI's Pythia models fine-tuned with approx [15K instruction demonstrations](https://huggingface.co/datasets/HuggingFaceH4/databricks_dolly_15k)
"""
  )
  with gr.Row():
    with gr.Column():
      with gr.Row():
        instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")
      with gr.Row():
        with gr.Column():
          with gr.Row():
            temperature = gr.Slider(
              label="Temperature",
              value=0.5,
              minimum=0.0,
              maximum=2.0,
              step=0.1,
              interactive=True,
              info="Higher values produce more diverse outputs",
            )
        with gr.Column():
          with gr.Row():
            top_p = gr.Slider(
              label="Top-p (nucleus sampling)",
              value=0.95,
              minimum=0.0,
              maximum=1,
              step=0.05,
              interactive=True,
              info="Higher values sample fewer low-probability tokens",
            )
        with gr.Column():
          with gr.Row():
            top_k = gr.Slider(
              label="Top-k",
              value=50,
              minimum=0.0,
              maximum=100,
              step=1,
              interactive=True,
              info="Sample from a shortlist of top-k tokens",
            )
        with gr.Column():
          with gr.Row():
            max_new_tokens = gr.Slider(
              label="Maximum new tokens",
              value=256,
              minimum=0,
              maximum=2048,
              step=5,
              interactive=True,
              info="The maximum number of new tokens to generate",
            )
        with gr.Row():
          submit = gr.Button("Generate Answers")
  with gr.Row():
    with gr.Column():
      with gr.Box():
        gr.Markdown("**Dolly 3B**")
        output_3b = gr.Markdown()
    with gr.Column():
      with gr.Box():
        gr.Markdown("**Dolly 7B**")
        output_7b = gr.Markdown()
      # with gr.Column():
      #  with gr.Box():
      #         gr.Markdown("**Dolly 12B**")
      #         output_12b = gr.Markdown()
  submit.click(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens],
               outputs=[output_3b, output_7b])
  instruction.submit(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens],
                     outputs=[output_3b, output_7b])

demo.launch()