hftestbackend

Runtime error

Sergidev commited on Oct 1, 2024

Commit

8bd6e7a

verified ·

1 Parent(s): 19a5ad7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ from threading import Thread
 from typing import Iterator
 import gradio as gr
-import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = """\
@@ -14,6 +13,8 @@ This is a demo of [`Qwen/Qwen2-0.5B-Instruct`](https://huggingface.co/Qwen/Qwen2
 This space allows you to input text and have the AI complete it. Simply type your text in the input box, click "Complete", and watch as the AI generates a continuation of your text.
 You can adjust various parameters such as temperature and top-p sampling to control the generation process.
 """
 MAX_MAX_NEW_TOKENS = 2048
@@ -30,7 +31,6 @@ model = AutoModelForCausalLM.from_pretrained(
     torch_dtype=torch.bfloat16,
 )
 model.eval()
-@spaces.GPU(duration=90)
 def generate(
     message: str,
     max_new_tokens: int = 1024,
@@ -64,7 +64,8 @@ def generate(
     for text in streamer:
         full_message += text
         yield full_message
-        with gr.Blocks(css="style.css", fill_height=True) as demo:
             gr.Markdown(DESCRIPTION)
             gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
@@ -143,4 +144,7 @@ def generate(
             )
 if __name__ == "__main__":
     demo.queue(max_size=20).launch()

 from typing import Iterator
 import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 DESCRIPTION = """\
 This space allows you to input text and have the AI complete it. Simply type your text in the input box, click "Complete", and watch as the AI generates a continuation of your text.
 You can adjust various parameters such as temperature and top-p sampling to control the generation process.
+Note: You may see a warning about bitsandbytes being compiled without GPU support. This is expected in environments without GPU and does not affect the basic functionality of the demo.
 """
 MAX_MAX_NEW_TOKENS = 2048
     torch_dtype=torch.bfloat16,
 )
 model.eval()
 def generate(
     message: str,
     max_new_tokens: int = 1024,
     for text in streamer:
         full_message += text
         yield full_message
+with gr.Blocks(css="style.css", fill_height=True) as demo:
             gr.Markdown(DESCRIPTION)
             gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
             )
 if __name__ == "__main__":
+demo = gr.Blocks(css="style.css", fill_height=True)
     demo.queue(max_size=20).launch()