lihongze8 commited on
Commit
7f087b8
·
verified ·
1 Parent(s): 305a7a1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer
3
+ from model_utils.prm_model import PRM_MODEL
4
+ from model_utils.io_utils import prepare_input, prepare_batch_input_for_model, derive_step_rewards
5
+ import torch
6
+
7
+ # 初始化模型和tokenizer (和你现有代码一样)
8
+ model_id = "Skywork/Skywork-o1-Open-PRM-Qwen-2.5-1.5B"
9
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
10
+ model = PRM_MODEL.from_pretrained(model_id).to("cpu").eval()
11
+
12
+ def evaluate(problem, response):
13
+ processed_data = prepare_input(problem, response, tokenizer=tokenizer, step_token="\n")
14
+ input_ids, steps, reward_flags = [processed_data]
15
+
16
+ input_ids, attention_mask, reward_flags = prepare_batch_input_for_model(input_ids, reward_flags, tokenizer.pad_token_id)
17
+ input_ids = input_ids.to("cpu")
18
+ attention_mask = attention_mask.to("cpu")
19
+
20
+ with torch.no_grad():
21
+ _, _, rewards = model(input_ids=input_ids, attention_mask=attention_mask, return_probs=True)
22
+
23
+ step_rewards = derive_step_rewards(rewards, reward_flags)
24
+ return step_rewards[0].tolist()
25
+
26
+ # 创建Gradio界面
27
+ iface = gr.Interface(
28
+ fn=evaluate,
29
+ inputs=[
30
+ gr.Textbox(label="Problem"),
31
+ gr.Textbox(label="Response")
32
+ ],
33
+ outputs=gr.JSON(label="Step Rewards"),
34
+ title="Problem Response Evaluation",
35
+ description="Enter a problem and its response to get step-wise rewards"
36
+ )
37
+
38
+ # 启动接口
39
+ iface.launch()