Sergidev commited on
Commit
7f7ba92
Β·
verified Β·
1 Parent(s): 6caf75a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -88
app.py CHANGED
@@ -1,20 +1,17 @@
1
  import os
2
  from threading import Thread
3
  from typing import Iterator
4
-
5
  import gradio as gr
 
6
  import torch
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
8
- DESCRIPTION = """\
9
- # Qwen 0.5B Text Completion
10
-
11
- This is a demo of [`Qwen/Qwen2-0.5B-Instruct`](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct), a lightweight language model fine-tuned for instruction following.
12
 
13
- This space allows you to input text and have the AI complete it. Simply type your text in the input box, click "Complete", and watch as the AI generates a continuation of your text.
 
14
 
15
- You can adjust various parameters such as temperature and top-p sampling to control the generation process.
16
 
17
- Note: You may see a warning about bitsandbytes being compiled without GPU support. This is expected in environments without GPU and does not affect the basic functionality of the demo.
18
  """
19
 
20
  MAX_MAX_NEW_TOKENS = 2048
@@ -31,6 +28,8 @@ model = AutoModelForCausalLM.from_pretrained(
31
  torch_dtype=torch.bfloat16,
32
  )
33
  model.eval()
 
 
34
  def generate(
35
  message: str,
36
  max_new_tokens: int = 1024,
@@ -47,7 +46,7 @@ def generate(
47
 
48
  streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
49
  generate_kwargs = dict(
50
- {"input_ids": input_ids},
51
  streamer=streamer,
52
  max_new_tokens=max_new_tokens,
53
  do_sample=True,
@@ -60,87 +59,73 @@ def generate(
60
  t = Thread(target=model.generate, kwargs=generate_kwargs)
61
  t.start()
62
 
63
- full_message = message
64
  for text in streamer:
65
- full_message += text
66
- yield full_message
67
 
68
  with gr.Blocks(css="style.css", fill_height=True) as demo:
69
- gr.Markdown(DESCRIPTION)
70
- gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
71
-
72
- with gr.Row():
73
- with gr.Column(scale=4):
74
- text_box = gr.Textbox(
75
- label="Enter your text",
76
- placeholder="Type your message here...",
77
- lines=10
78
- )
79
- with gr.Column(scale=1):
80
- max_new_tokens = gr.Slider(
81
- label="Max new tokens",
82
- minimum=1,
83
- maximum=MAX_MAX_NEW_TOKENS,
84
- step=1,
85
- value=DEFAULT_MAX_NEW_TOKENS,
86
- )
87
- temperature = gr.Slider(
88
- label="Temperature",
89
- minimum=0.1,
90
- maximum=4.0,
91
- step=0.1,
92
- value=0.6,
93
- )
94
- top_p = gr.Slider(
95
- label="Top-p (nucleus sampling)",
96
- minimum=0.05,
97
- maximum=1.0,
98
- step=0.05,
99
- value=0.9,
100
- )
101
- top_k = gr.Slider(
102
- label="Top-k",
103
- minimum=1,
104
- maximum=1000,
105
- step=1,
106
- value=50,
107
- )
108
- repetition_penalty = gr.Slider(
109
- label="Repetition penalty",
110
- minimum=1.0,
111
- maximum=2.0,
112
- step=0.05,
113
- value=1.2,
114
- )
115
-
116
- with gr.Row():
117
- complete_btn = gr.Button("Complete")
118
- stop_btn = gr.Button("Stop Generation")
119
-
120
- stop_click = stop_btn.click(fn=None, cancels=[complete_btn.click])
121
-
122
- complete_btn.click(
123
- fn=generate,
124
- inputs=[
125
- text_box,
126
- max_new_tokens,
127
- temperature,
128
- top_p,
129
- top_k,
130
- repetition_penalty
131
- ],
132
- outputs=text_box
133
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
- gr.Examples(
136
- examples=[
137
- "Hello there! How are you doing?",
138
- "Can you explain briefly to me what is the Python programming language?",
139
- "Explain the plot of Cinderella in a sentence.",
140
- "How many hours does it take a man to eat a Helicopter?",
141
- "Write a 100-word article on 'Benefits of Open-Source in AI research'",
142
- ],
143
- inputs=text_box
144
- )
145
- if __name__ == "__main__":
146
- demo.queue(max_size=20).launch()
 
 
 
1
  import os
2
  from threading import Thread
3
  from typing import Iterator
 
4
  import gradio as gr
5
+ import spaces
6
  import torch
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
 
 
 
8
 
9
+ DESCRIPTION = """\
10
+ # Qwen2 0.5B Instruct Text Completion
11
 
12
+ This is a demo of [`Qwen/Qwen2-0.5B-Instruct`](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct), fine-tuned for instruction following.
13
 
14
+ Enter your text in the box below and click "Complete" to have the AI generate a completion for your input. The generated text will be appended to your input. You can stop the generation at any time by clicking the "Stop" button.
15
  """
16
 
17
  MAX_MAX_NEW_TOKENS = 2048
 
28
  torch_dtype=torch.bfloat16,
29
  )
30
  model.eval()
31
+
32
+ @spaces.GPU(duration=90)
33
  def generate(
34
  message: str,
35
  max_new_tokens: int = 1024,
 
46
 
47
  streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
48
  generate_kwargs = dict(
49
+ input_ids=input_ids,
50
  streamer=streamer,
51
  max_new_tokens=max_new_tokens,
52
  do_sample=True,
 
59
  t = Thread(target=model.generate, kwargs=generate_kwargs)
60
  t.start()
61
 
62
+ partial_message = message
63
  for text in streamer:
64
+ partial_message += text
65
+ yield partial_message
66
 
67
  with gr.Blocks(css="style.css", fill_height=True) as demo:
68
+ gr.Markdown(DESCRIPTION)
69
+ gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
70
+
71
+ with gr.Row():
72
+ with gr.Column(scale=4):
73
+ text_box = gr.Textbox(
74
+ label="Enter your text",
75
+ placeholder="Type your message here...",
76
+ lines=5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  )
78
+ with gr.Column(scale=1):
79
+ complete_button = gr.Button("Complete")
80
+ stop_button = gr.Button("Stop")
81
+
82
+ max_new_tokens = gr.Slider(
83
+ label="Max new tokens",
84
+ minimum=1,
85
+ maximum=MAX_MAX_NEW_TOKENS,
86
+ step=1,
87
+ value=DEFAULT_MAX_NEW_TOKENS,
88
+ )
89
+ temperature = gr.Slider(
90
+ label="Temperature",
91
+ minimum=0.1,
92
+ maximum=4.0,
93
+ step=0.1,
94
+ value=0.6,
95
+ )
96
+ top_p = gr.Slider(
97
+ label="Top-p (nucleus sampling)",
98
+ minimum=0.05,
99
+ maximum=1.0,
100
+ step=0.05,
101
+ value=0.9,
102
+ )
103
+ top_k = gr.Slider(
104
+ label="Top-k",
105
+ minimum=1,
106
+ maximum=1000,
107
+ step=1,
108
+ value=50,
109
+ )
110
+ repetition_penalty = gr.Slider(
111
+ label="Repetition penalty",
112
+ minimum=1.0,
113
+ maximum=2.0,
114
+ step=0.05,
115
+ value=1.2,
116
+ )
117
 
118
+ complete_button.click(
119
+ generate,
120
+ inputs=[text_box, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
121
+ outputs=[text_box],
122
+ )
123
+ stop_button.click(
124
+ None,
125
+ None,
126
+ None,
127
+ cancels=[complete_button.click]
128
+ )
129
+
130
+ if __name__ == "__main__":
131
+ demo.queue(max_size=20).launch()