Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
# LoRA Inference Gradio Space Demo
|
|
|
2 |
import spaces
|
3 |
import gradio as gr
|
4 |
from peft import PeftModel
|
@@ -22,16 +23,20 @@ tokenizer = AutoTokenizer.from_pretrained("unsloth/qwen2.5-3b-instruct-unsloth-b
|
|
22 |
|
23 |
@spaces.GPU
|
24 |
def generate_response(prompt):
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
27 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
28 |
|
29 |
zk_qwen = gr.Interface(
|
30 |
fn=generate_response,
|
31 |
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
|
32 |
outputs=gr.Textbox(label="Response"),
|
33 |
-
title="LoRA Model Inference",
|
34 |
-
description="Demo your LoRA model with Hugging Face Gradio."
|
35 |
)
|
36 |
|
37 |
zk_qwen.launch()
|
|
|
1 |
# LoRA Inference Gradio Space Demo
|
2 |
+
|
3 |
import spaces
|
4 |
import gradio as gr
|
5 |
from peft import PeftModel
|
|
|
23 |
|
24 |
@spaces.GPU
|
25 |
def generate_response(prompt):
|
26 |
+
reasoning_prompt = (
|
27 |
+
"Answer the following question and explain your reasoning step by step.\n"
|
28 |
+
f"Question: {prompt}\nReasoning:"
|
29 |
+
)
|
30 |
+
inputs = tokenizer(reasoning_prompt, return_tensors="pt").to(model.device)
|
31 |
+
outputs = model.generate(**inputs, max_new_tokens=150)
|
32 |
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
33 |
|
34 |
zk_qwen = gr.Interface(
|
35 |
fn=generate_response,
|
36 |
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
|
37 |
outputs=gr.Textbox(label="Response"),
|
38 |
+
title="LoRA Model Reasoning Inference",
|
39 |
+
description="Demo your LoRA model with step-by-step reasoning in Hugging Face Gradio."
|
40 |
)
|
41 |
|
42 |
zk_qwen.launch()
|