saumyamalik commited on
Commit
472c111
·
1 Parent(s): 4a49ee3

Added asterisk for closed models

Browse files
Files changed (2) hide show
  1. leaderboard/md.py +1 -1
  2. leaderboard/utils.py +7 -0
leaderboard/md.py CHANGED
@@ -178,7 +178,7 @@ TOP_TEXT = """# RewardBench: Evaluating Reward Models"""
178
 
179
  CAPTION_V2 = f"""The *new version* of RewardBench that is based on unseen human data and designed to be substantially more difficult!
180
 
181
- [Code](https://github.com/allenai/reward-bench) | [Eval. Dataset v2](https://huggingface.co/datasets/allenai/reward-bench-2) | [Results v2](https://huggingface.co/datasets/allenai/reward-bench-2-results) | [Paper](https://arxiv.org/abs/2506.01937) | Total models: {{}} | Last restart (PST): {current_time}"""
182
 
183
  CAPTION_V1 = f"""The original RewardBench -- the first reward model evaluation.
184
 
 
178
 
179
  CAPTION_V2 = f"""The *new version* of RewardBench that is based on unseen human data and designed to be substantially more difficult!
180
 
181
+ [Code](https://github.com/allenai/reward-bench) | [Eval. Dataset v2](https://huggingface.co/datasets/allenai/reward-bench-2) | [Results v2](https://huggingface.co/datasets/allenai/reward-bench-2-results) | [Paper](https://arxiv.org/abs/2506.01937) | Total models: {{}} | | * Closed models not run on Ai2 infrastructure | Last restart (PST): {current_time}"""
182
 
183
  CAPTION_V1 = f"""The original RewardBench -- the first reward model evaluation.
184
 
leaderboard/utils.py CHANGED
@@ -43,6 +43,13 @@ CONTAMINATED_MODELS_V1 = [
43
  "Ray2333/GRM-Gemma-2B-rewardmodel-ft",
44
  ]
45
 
 
 
 
 
 
 
 
46
 
47
  # From Open LLM Leaderboard
48
  def model_hyperlink(link, model_name):
 
43
  "Ray2333/GRM-Gemma-2B-rewardmodel-ft",
44
  ]
45
 
46
+ UNVERIFIED_MODELS_V2 = [
47
+ "ContextualAI/LMUnit-llama3.1-70b",
48
+ "ContextualAI/LMUnit-qwen2.5-72b",
49
+ ]
50
+
51
+ UNVERIFIED_MODELS = UNVERIFIED_MODELS_V2
52
+
53
 
54
  # From Open LLM Leaderboard
55
  def model_hyperlink(link, model_name):