ZennyKenny commited on
Commit
c4c5c31
·
verified ·
1 Parent(s): fab6136

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -5
app.py CHANGED
@@ -28,15 +28,28 @@ def generate_response(prompt):
28
  f"Question: {prompt}\nReasoning:"
29
  )
30
  inputs = tokenizer(reasoning_prompt, return_tensors="pt").to(model.device)
31
- outputs = model.generate(**inputs, max_new_tokens=150)
32
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- zk_qwen = gr.Interface(
35
  fn=generate_response,
36
  inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
37
  outputs=gr.Textbox(label="Response"),
38
  title="LoRA Model Reasoning Inference",
39
- description="Demo your LoRA model with step-by-step reasoning in Hugging Face Gradio."
 
40
  )
41
 
42
- zk_qwen.launch()
 
28
  f"Question: {prompt}\nReasoning:"
29
  )
30
  inputs = tokenizer(reasoning_prompt, return_tensors="pt").to(model.device)
31
+
32
+ # Streamed response
33
+ stream = model.generate(
34
+ **inputs,
35
+ max_new_tokens=300, # Increased token limit
36
+ do_sample=True,
37
+ temperature=0.8,
38
+ top_p=0.95,
39
+ stream=True
40
+ )
41
+
42
+ # Yield output tokens in real-time
43
+ for chunk in stream:
44
+ yield tokenizer.decode(chunk[0], skip_special_tokens=True)
45
 
46
+ demo = gr.Interface(
47
  fn=generate_response,
48
  inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
49
  outputs=gr.Textbox(label="Response"),
50
  title="LoRA Model Reasoning Inference",
51
+ description="Demo your LoRA model with step-by-step reasoning in Hugging Face Gradio.",
52
+ live=True
53
  )
54
 
55
+ demo.launch()