Update app.py
Browse files
app.py
CHANGED
@@ -7,9 +7,14 @@ import json
|
|
7 |
def setup_environment():
|
8 |
if not os.path.exists("skywork-o1-prm-inference"):
|
9 |
print("Cloning repository...")
|
10 |
-
subprocess.run(["git", "clone", "https://github.com/SkyworkAI/skywork-o1-prm-inference.git"], check=True)
|
|
|
|
|
|
|
|
|
11 |
if repo_path not in sys.path:
|
12 |
-
sys.path.append(repo_path)
|
|
|
13 |
|
14 |
setup_environment()
|
15 |
|
@@ -27,17 +32,21 @@ def evaluate(problem, response):
|
|
27 |
try:
|
28 |
# 处理输入数据
|
29 |
processed_data = prepare_input(problem, response, tokenizer=tokenizer, step_token="\n")
|
30 |
-
input_ids, steps, reward_flags = [processed_data]
|
|
|
|
|
31 |
input_ids, attention_mask, reward_flags = prepare_batch_input_for_model(
|
32 |
input_ids,
|
33 |
reward_flags,
|
34 |
tokenizer.pad_token_id
|
35 |
-
)
|
|
|
|
|
36 |
input_ids = input_ids.to("cpu")
|
37 |
attention_mask = attention_mask.to("cpu")
|
38 |
if isinstance(reward_flags, torch.Tensor):
|
39 |
reward_flags = reward_flags.to("cpu")
|
40 |
-
|
41 |
# 模型推理
|
42 |
with torch.no_grad():
|
43 |
_, _, rewards = model(
|
@@ -45,8 +54,11 @@ def evaluate(problem, response):
|
|
45 |
attention_mask=attention_mask,
|
46 |
return_probs=True
|
47 |
)
|
|
|
48 |
# 计算步骤奖励
|
49 |
-
step_rewards = derive_step_rewards(rewards, reward_flags)
|
|
|
|
|
50 |
return json.dumps(step_rewards[0].tolist())
|
51 |
except Exception as e:
|
52 |
return json.dumps({"error": str(e)})
|
|
|
7 |
def setup_environment():
|
8 |
if not os.path.exists("skywork-o1-prm-inference"):
|
9 |
print("Cloning repository...")
|
10 |
+
subprocess.run(["git", "clone", "https://github.com/SkyworkAI/skywork-o1-prm-inference.git"], check=True)
|
11 |
+
repo_path = os.path.abspath("skywork-o1-prm-inference")
|
12 |
+
else:
|
13 |
+
repo_path = os.path.abspath("skywork-o1-prm-inference")
|
14 |
+
|
15 |
if repo_path not in sys.path:
|
16 |
+
sys.path.append(repo_path)
|
17 |
+
print(f"Added {repo_path} to Python path")
|
18 |
|
19 |
setup_environment()
|
20 |
|
|
|
32 |
try:
|
33 |
# 处理输入数据
|
34 |
processed_data = prepare_input(problem, response, tokenizer=tokenizer, step_token="\n")
|
35 |
+
input_ids, steps, reward_flags = [processed_data]
|
36 |
+
|
37 |
+
# 准备批处理输入
|
38 |
input_ids, attention_mask, reward_flags = prepare_batch_input_for_model(
|
39 |
input_ids,
|
40 |
reward_flags,
|
41 |
tokenizer.pad_token_id
|
42 |
+
)
|
43 |
+
|
44 |
+
# 确保在CPU上
|
45 |
input_ids = input_ids.to("cpu")
|
46 |
attention_mask = attention_mask.to("cpu")
|
47 |
if isinstance(reward_flags, torch.Tensor):
|
48 |
reward_flags = reward_flags.to("cpu")
|
49 |
+
|
50 |
# 模型推理
|
51 |
with torch.no_grad():
|
52 |
_, _, rewards = model(
|
|
|
54 |
attention_mask=attention_mask,
|
55 |
return_probs=True
|
56 |
)
|
57 |
+
|
58 |
# 计算步骤奖励
|
59 |
+
step_rewards = derive_step_rewards(rewards, reward_flags)
|
60 |
+
|
61 |
+
# 确保返回的是有效的JSON字符串
|
62 |
return json.dumps(step_rewards[0].tolist())
|
63 |
except Exception as e:
|
64 |
return json.dumps({"error": str(e)})
|