hftestbackend

Runtime error

App Files Files Community

Sergidev commited on Oct 1, 2024

Commit

7f7ba92

verified ·

1 Parent(s): 6caf75a

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -88

app.py CHANGED Viewed

@@ -1,20 +1,17 @@
 import os
 from threading import Thread
 from typing import Iterator
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-DESCRIPTION = """\
-# Qwen 0.5B Text Completion
-This is a demo of [`Qwen/Qwen2-0.5B-Instruct`](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct), a lightweight language model fine-tuned for instruction following.
-This space allows you to input text and have the AI complete it. Simply type your text in the input box, click "Complete", and watch as the AI generates a continuation of your text.
-You can adjust various parameters such as temperature and top-p sampling to control the generation process.
-Note: You may see a warning about bitsandbytes being compiled without GPU support. This is expected in environments without GPU and does not affect the basic functionality of the demo.
 """
 MAX_MAX_NEW_TOKENS = 2048
@@ -31,6 +28,8 @@ model = AutoModelForCausalLM.from_pretrained(
     torch_dtype=torch.bfloat16,
 )
 model.eval()
 def generate(
     message: str,
     max_new_tokens: int = 1024,
@@ -47,7 +46,7 @@ def generate(
     streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
-        {"input_ids": input_ids},
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
@@ -60,87 +59,73 @@ def generate(
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
-    full_message = message
     for text in streamer:
-        full_message += text
-        yield full_message
 with gr.Blocks(css="style.css", fill_height=True) as demo:
-            gr.Markdown(DESCRIPTION)
-            gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
-            with gr.Row():
-                with gr.Column(scale=4):
-                    text_box = gr.Textbox(
-                        label="Enter your text",
-                        placeholder="Type your message here...",
-                        lines=10
-                    )
-                with gr.Column(scale=1):
-                    max_new_tokens = gr.Slider(
-                        label="Max new tokens",
-                        minimum=1,
-                        maximum=MAX_MAX_NEW_TOKENS,
-                        step=1,
-                        value=DEFAULT_MAX_NEW_TOKENS,
-                    )
-                    temperature = gr.Slider(
-                        label="Temperature",
-                        minimum=0.1,
-                        maximum=4.0,
-                        step=0.1,
-                        value=0.6,
-                    )
-                    top_p = gr.Slider(
-                        label="Top-p (nucleus sampling)",
-                        minimum=0.05,
-                        maximum=1.0,
-                        step=0.05,
-                        value=0.9,
-                    )
-                    top_k = gr.Slider(
-                        label="Top-k",
-                        minimum=1,
-                        maximum=1000,
-                        step=1,
-                        value=50,
-                    )
-                    repetition_penalty = gr.Slider(
-                        label="Repetition penalty",
-                        minimum=1.0,
-                        maximum=2.0,
-                        step=0.05,
-                        value=1.2,
-                    )
-            with gr.Row():
-                complete_btn = gr.Button("Complete")
-                stop_btn = gr.Button("Stop Generation")
-            stop_click = stop_btn.click(fn=None, cancels=[complete_btn.click])
-            complete_btn.click(
-                fn=generate,
-                inputs=[
-                    text_box,
-                    max_new_tokens,
-                    temperature,
-                    top_p,
-                    top_k,
-                    repetition_penalty
-                ],
-                outputs=text_box
             )
-            gr.Examples(
-                examples=[
-                    "Hello there! How are you doing?",
-                    "Can you explain briefly to me what is the Python programming language?",
-                    "Explain the plot of Cinderella in a sentence.",
-                    "How many hours does it take a man to eat a Helicopter?",
-                    "Write a 100-word article on 'Benefits of Open-Source in AI research'",
-                ],
-                inputs=text_box
-            )
-            if __name__ == "__main__":
-                demo.queue(max_size=20).launch()

 import os
 from threading import Thread
 from typing import Iterator
 import gradio as gr
+import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+DESCRIPTION = """\
+# Qwen2 0.5B Instruct Text Completion
+This is a demo of [`Qwen/Qwen2-0.5B-Instruct`](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct), fine-tuned for instruction following.
+Enter your text in the box below and click "Complete" to have the AI generate a completion for your input. The generated text will be appended to your input. You can stop the generation at any time by clicking the "Stop" button.
 """
 MAX_MAX_NEW_TOKENS = 2048
     torch_dtype=torch.bfloat16,
 )
 model.eval()
+@spaces.GPU(duration=90)
 def generate(
     message: str,
     max_new_tokens: int = 1024,
     streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
+        input_ids=input_ids,
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
+    partial_message = message
     for text in streamer:
+        partial_message += text
+        yield partial_message
 with gr.Blocks(css="style.css", fill_height=True) as demo:
+    gr.Markdown(DESCRIPTION)
+    gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
+    with gr.Row():
+        with gr.Column(scale=4):
+            text_box = gr.Textbox(
+                label="Enter your text",
+                placeholder="Type your message here...",
+                lines=5
             )
+        with gr.Column(scale=1):
+            complete_button = gr.Button("Complete")
+            stop_button = gr.Button("Stop")
+    max_new_tokens = gr.Slider(
+        label="Max new tokens",
+        minimum=1,
+        maximum=MAX_MAX_NEW_TOKENS,
+        step=1,
+        value=DEFAULT_MAX_NEW_TOKENS,
+    )
+    temperature = gr.Slider(
+        label="Temperature",
+        minimum=0.1,
+        maximum=4.0,
+        step=0.1,
+        value=0.6,
+    )
+    top_p = gr.Slider(
+        label="Top-p (nucleus sampling)",
+        minimum=0.05,
+        maximum=1.0,
+        step=0.05,
+        value=0.9,
+    )
+    top_k = gr.Slider(
+        label="Top-k",
+        minimum=1,
+        maximum=1000,
+        step=1,
+        value=50,
+    )
+    repetition_penalty = gr.Slider(
+        label="Repetition penalty",
+        minimum=1.0,
+        maximum=2.0,
+        step=0.05,
+        value=1.2,
+    )
+    complete_button.click(
+        generate,
+        inputs=[text_box, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
+        outputs=[text_box],
+    )
+    stop_button.click(
+        None,
+        None,
+        None,
+        cancels=[complete_button.click]
+    )
+if __name__ == "__main__":
+    demo.queue(max_size=20).launch()