Spaces:

ZennyKenny
/

GRPO_Qwen_3B_ZK_FineTune_LoRA_Demo

Sleeping

ZennyKenny commited on 14 days ago

Commit

c4c5c31

verified ·

1 Parent(s): fab6136

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -28,15 +28,28 @@ def generate_response(prompt):
         f"Question: {prompt}\nReasoning:"
     )
     inputs = tokenizer(reasoning_prompt, return_tensors="pt").to(model.device)
-    outputs = model.generate(**inputs, max_new_tokens=150)
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
-zk_qwen = gr.Interface(
     fn=generate_response,
     inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
     outputs=gr.Textbox(label="Response"),
     title="LoRA Model Reasoning Inference",
-    description="Demo your LoRA model with step-by-step reasoning in Hugging Face Gradio."
 )
-zk_qwen.launch()

         f"Question: {prompt}\nReasoning:"
     )
     inputs = tokenizer(reasoning_prompt, return_tensors="pt").to(model.device)
+    # Streamed response
+    stream = model.generate(
+        **inputs,
+        max_new_tokens=300,  # Increased token limit
+        do_sample=True,
+        temperature=0.8,
+        top_p=0.95,
+        stream=True
+    )
+    # Yield output tokens in real-time
+    for chunk in stream:
+        yield tokenizer.decode(chunk[0], skip_special_tokens=True)
+demo = gr.Interface(
     fn=generate_response,
     inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
     outputs=gr.Textbox(label="Response"),
     title="LoRA Model Reasoning Inference",
+    description="Demo your LoRA model with step-by-step reasoning in Hugging Face Gradio.",
+    live=True
 )
+demo.launch()