ZennyKenny commited on
Commit
fab6136
·
verified ·
1 Parent(s): bb44481

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -1,4 +1,5 @@
1
  # LoRA Inference Gradio Space Demo
 
2
  import spaces
3
  import gradio as gr
4
  from peft import PeftModel
@@ -22,16 +23,20 @@ tokenizer = AutoTokenizer.from_pretrained("unsloth/qwen2.5-3b-instruct-unsloth-b
22
 
23
  @spaces.GPU
24
  def generate_response(prompt):
25
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
26
- outputs = model.generate(**inputs, max_new_tokens=50)
 
 
 
 
27
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
28
 
29
  zk_qwen = gr.Interface(
30
  fn=generate_response,
31
  inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
32
  outputs=gr.Textbox(label="Response"),
33
- title="LoRA Model Inference",
34
- description="Demo your LoRA model with Hugging Face Gradio."
35
  )
36
 
37
  zk_qwen.launch()
 
1
  # LoRA Inference Gradio Space Demo
2
+
3
  import spaces
4
  import gradio as gr
5
  from peft import PeftModel
 
23
 
24
  @spaces.GPU
25
  def generate_response(prompt):
26
+ reasoning_prompt = (
27
+ "Answer the following question and explain your reasoning step by step.\n"
28
+ f"Question: {prompt}\nReasoning:"
29
+ )
30
+ inputs = tokenizer(reasoning_prompt, return_tensors="pt").to(model.device)
31
+ outputs = model.generate(**inputs, max_new_tokens=150)
32
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
33
 
34
  zk_qwen = gr.Interface(
35
  fn=generate_response,
36
  inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
37
  outputs=gr.Textbox(label="Response"),
38
+ title="LoRA Model Reasoning Inference",
39
+ description="Demo your LoRA model with step-by-step reasoning in Hugging Face Gradio."
40
  )
41
 
42
  zk_qwen.launch()