lihongze8 commited on
Commit
611c227
·
verified ·
1 Parent(s): f71f486

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -6
app.py CHANGED
@@ -7,9 +7,14 @@ import json
7
  def setup_environment():
8
  if not os.path.exists("skywork-o1-prm-inference"):
9
  print("Cloning repository...")
10
- subprocess.run(["git", "clone", "https://github.com/SkyworkAI/skywork-o1-prm-inference.git"], check=True)repo_path = os.path.abspath("skywork-o1-prm-inference")
 
 
 
 
11
  if repo_path not in sys.path:
12
- sys.path.append(repo_path)print(f"Added {repo_path} to Python path")
 
13
 
14
  setup_environment()
15
 
@@ -27,17 +32,21 @@ def evaluate(problem, response):
27
  try:
28
  # 处理输入数据
29
  processed_data = prepare_input(problem, response, tokenizer=tokenizer, step_token="\n")
30
- input_ids, steps, reward_flags = [processed_data]# 准备批处理输入
 
 
31
  input_ids, attention_mask, reward_flags = prepare_batch_input_for_model(
32
  input_ids,
33
  reward_flags,
34
  tokenizer.pad_token_id
35
- )# 确保在CPU上
 
 
36
  input_ids = input_ids.to("cpu")
37
  attention_mask = attention_mask.to("cpu")
38
  if isinstance(reward_flags, torch.Tensor):
39
  reward_flags = reward_flags.to("cpu")
40
-
41
  # 模型推理
42
  with torch.no_grad():
43
  _, _, rewards = model(
@@ -45,8 +54,11 @@ def evaluate(problem, response):
45
  attention_mask=attention_mask,
46
  return_probs=True
47
  )
 
48
  # 计算步骤奖励
49
- step_rewards = derive_step_rewards(rewards, reward_flags)# 确保返回的是有效的JSON字符串
 
 
50
  return json.dumps(step_rewards[0].tolist())
51
  except Exception as e:
52
  return json.dumps({"error": str(e)})
 
7
  def setup_environment():
8
  if not os.path.exists("skywork-o1-prm-inference"):
9
  print("Cloning repository...")
10
+ subprocess.run(["git", "clone", "https://github.com/SkyworkAI/skywork-o1-prm-inference.git"], check=True)
11
+ repo_path = os.path.abspath("skywork-o1-prm-inference")
12
+ else:
13
+ repo_path = os.path.abspath("skywork-o1-prm-inference")
14
+
15
  if repo_path not in sys.path:
16
+ sys.path.append(repo_path)
17
+ print(f"Added {repo_path} to Python path")
18
 
19
  setup_environment()
20
 
 
32
  try:
33
  # 处理输入数据
34
  processed_data = prepare_input(problem, response, tokenizer=tokenizer, step_token="\n")
35
+ input_ids, steps, reward_flags = [processed_data]
36
+
37
+ # 准备批处理输入
38
  input_ids, attention_mask, reward_flags = prepare_batch_input_for_model(
39
  input_ids,
40
  reward_flags,
41
  tokenizer.pad_token_id
42
+ )
43
+
44
+ # 确保在CPU上
45
  input_ids = input_ids.to("cpu")
46
  attention_mask = attention_mask.to("cpu")
47
  if isinstance(reward_flags, torch.Tensor):
48
  reward_flags = reward_flags.to("cpu")
49
+
50
  # 模型推理
51
  with torch.no_grad():
52
  _, _, rewards = model(
 
54
  attention_mask=attention_mask,
55
  return_probs=True
56
  )
57
+
58
  # 计算步骤奖励
59
+ step_rewards = derive_step_rewards(rewards, reward_flags)
60
+
61
+ # 确保返回的是有效的JSON字符串
62
  return json.dumps(step_rewards[0].tolist())
63
  except Exception as e:
64
  return json.dumps({"error": str(e)})