ZennyKenny commited on
Commit
f60da4f
·
verified ·
1 Parent(s): 4e01411

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -5,6 +5,7 @@ import gradio as gr
5
  from peft import PeftModel
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
7
  import torch
 
8
 
9
  # Load the base model
10
  base_model = AutoModelForCausalLM.from_pretrained(
@@ -22,14 +23,13 @@ model = PeftModel.from_pretrained(
22
  # Load the tokenizer
23
  tokenizer = AutoTokenizer.from_pretrained("unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit")
24
 
25
- @spaces.GPU
26
  def generate_response(prompt):
27
  reasoning_prompt = (
28
  "Answer the following question and explain your reasoning step by step.\n"
29
  f"Question: {prompt}\nReasoning:"
30
  )
31
  inputs = tokenizer(reasoning_prompt, return_tensors="pt").to(model.device)
32
-
33
  # Using TextIteratorStreamer for streaming responses
34
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
35
 
@@ -42,7 +42,7 @@ def generate_response(prompt):
42
  streamer=streamer
43
  )
44
 
45
- thread = torch.Thread(target=model.generate, kwargs=generation_kwargs)
46
  thread.start()
47
 
48
  for new_text in streamer:
@@ -54,7 +54,7 @@ demo = gr.Interface(
54
  outputs=gr.Textbox(label="Response"),
55
  title="LoRA Model Reasoning Inference",
56
  description="Demo your LoRA model with step-by-step reasoning in Hugging Face Gradio.",
57
- live=True
58
  )
59
 
60
  demo.launch(share=True)
 
5
  from peft import PeftModel
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
7
  import torch
8
+ import threading
9
 
10
  # Load the base model
11
  base_model = AutoModelForCausalLM.from_pretrained(
 
23
  # Load the tokenizer
24
  tokenizer = AutoTokenizer.from_pretrained("unsloth/qwen2.5-3b-instruct-unsloth-bnb-4bit")
25
 
 
26
  def generate_response(prompt):
27
  reasoning_prompt = (
28
  "Answer the following question and explain your reasoning step by step.\n"
29
  f"Question: {prompt}\nReasoning:"
30
  )
31
  inputs = tokenizer(reasoning_prompt, return_tensors="pt").to(model.device)
32
+
33
  # Using TextIteratorStreamer for streaming responses
34
  streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
35
 
 
42
  streamer=streamer
43
  )
44
 
45
+ thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
46
  thread.start()
47
 
48
  for new_text in streamer:
 
54
  outputs=gr.Textbox(label="Response"),
55
  title="LoRA Model Reasoning Inference",
56
  description="Demo your LoRA model with step-by-step reasoning in Hugging Face Gradio.",
57
+ allow_flagging="never"
58
  )
59
 
60
  demo.launch(share=True)