xiaowang7777 commited on
Commit
9e169b0
1 Parent(s): bbb2c84
Files changed (2) hide show
  1. app.py +87 -1
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,3 +1,89 @@
1
  import gradio as gr
 
 
2
 
3
- gr.Interface.load("models/fnlp/moss-moon-003-sft-int8",trust_remote_code=True).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
+ import torch
4
 
5
+ nstruct_pipeline_3b = pipeline(model="fnlp/moss-moon-003-sft-int8", torch_dtype=torch.float, trust_remote_code=True,
6
+ device_map="auto")
7
+
8
+
9
+ def generate(query, temperature, top_p, top_k, max_new_tokens):
10
+ return nstruct_pipeline_3b(query, temperature, top_p, top_k, max_new_tokens)
11
+
12
+
13
+ with gr.Blocks() as demo:
14
+ gr.Markdown(
15
+ """<h1><center>Databricks Dolly LLMs</center></h1>
16
+
17
+ This demo compares the smaller two variants of the Databricks Dolly models, the [2.8B](https://huggingface.co/databricks/dolly-v2-3b), and the [6.9B](https://huggingface.co/databricks/dolly-v2-7b). They are all based on the EluetherAI's Pythia models fine-tuned with approx [15K instruction demonstrations](https://huggingface.co/datasets/HuggingFaceH4/databricks_dolly_15k)
18
+ """
19
+ )
20
+ with gr.Row():
21
+ with gr.Column():
22
+ with gr.Row():
23
+ instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")
24
+ with gr.Row():
25
+ with gr.Column():
26
+ with gr.Row():
27
+ temperature = gr.Slider(
28
+ label="Temperature",
29
+ value=0.5,
30
+ minimum=0.0,
31
+ maximum=2.0,
32
+ step=0.1,
33
+ interactive=True,
34
+ info="Higher values produce more diverse outputs",
35
+ )
36
+ with gr.Column():
37
+ with gr.Row():
38
+ top_p = gr.Slider(
39
+ label="Top-p (nucleus sampling)",
40
+ value=0.95,
41
+ minimum=0.0,
42
+ maximum=1,
43
+ step=0.05,
44
+ interactive=True,
45
+ info="Higher values sample fewer low-probability tokens",
46
+ )
47
+ with gr.Column():
48
+ with gr.Row():
49
+ top_k = gr.Slider(
50
+ label="Top-k",
51
+ value=50,
52
+ minimum=0.0,
53
+ maximum=100,
54
+ step=1,
55
+ interactive=True,
56
+ info="Sample from a shortlist of top-k tokens",
57
+ )
58
+ with gr.Column():
59
+ with gr.Row():
60
+ max_new_tokens = gr.Slider(
61
+ label="Maximum new tokens",
62
+ value=256,
63
+ minimum=0,
64
+ maximum=2048,
65
+ step=5,
66
+ interactive=True,
67
+ info="The maximum number of new tokens to generate",
68
+ )
69
+ with gr.Row():
70
+ submit = gr.Button("Generate Answers")
71
+ with gr.Row():
72
+ with gr.Column():
73
+ with gr.Box():
74
+ gr.Markdown("**Dolly 3B**")
75
+ output_3b = gr.Markdown()
76
+ with gr.Column():
77
+ with gr.Box():
78
+ gr.Markdown("**Dolly 7B**")
79
+ output_7b = gr.Markdown()
80
+ # with gr.Column():
81
+ # with gr.Box():
82
+ # gr.Markdown("**Dolly 12B**")
83
+ # output_12b = gr.Markdown()
84
+ submit.click(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens],
85
+ outputs=[output_3b, output_7b])
86
+ instruction.submit(generate, inputs=[instruction, temperature, top_p, top_k, max_new_tokens],
87
+ outputs=[output_3b, output_7b])
88
+
89
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ accelerate>=0.12.0
2
+ transformers[torch]==4.25.1