WisdomShell
/

RewardAnything-8B-v1

@@ -9,26 +9,19 @@ pipeline_tag: text-generation
 base_model: Qwen/Qwen1.5-7B-Chat
 # (Assuming Qwen1.5-7B-Chat is the closest equivalent, as qwen3-8b is not a standard HF model name. Please adjust if a more precise base_model identifier is available)
 ---
-# RewardAnything: Generalizable Principle-Following Reward Models (8B-v1)
 <div align="center">
   <picture>
-    <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/ZhuohaoYu/RewardAnything/main/assets/rewardanything-logo-horizontal-dark-mode.png">
-    <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/ZhuohaoYu/RewardAnything/main/assets/rewardanything-logo-horizontal.png">
-    <img alt="RewardAnything" src="https://raw.githubusercontent.com/ZhuohaoYu/RewardAnything/main/assets/rewardanything-logo-horizontal.png" width="400">
     </picture>
-  <br/>
   <p>
     <a href="https://zhuohaoyu.github.io/RewardAnything"><img alt="Website" src="https://img.shields.io/badge/🌐_Project-Website-A593C2?style=flat-square&labelColor=8A7AA8"></a>
-    <a href="https://huggingface.co/zhuohaoyu/RewardAnything-8B-v1"><img alt="Model Weights" src="https://img.shields.io/badge/🤗_HuggingFace-Model_Weights-D4A574?style=flat-square&labelColor=B8956A"></a>
     <a href="https://arxiv.org/abs/XXXX.XXXXX"><img alt="Paper" src="https://img.shields.io/badge/📄_arXiv-Paper-C7969C?style=flat-square&labelColor=A8798A"></a>
     <a href="https://pypi.org/project/rewardanything/"><img alt="PyPI" src="https://img.shields.io/pypi/v/rewardanything.svg?style=flat-square&color=7B9BB3&labelColor=5A7A94"></a>
     </p>
-  <br/>
-# RewardAnything: Generalizable Principle-Following Reward Models
   <a>Zhuohao Yu<sup>1,§</sup></a>&emsp;
   <a>Jiali Zeng<sup>2</sup></a>&emsp;
   <a>Weizheng Gu<sup>1</sup></a>&emsp;
@@ -40,7 +33,6 @@ base_model: Qwen/Qwen1.5-7B-Chat
   <a>Shikun Zhang<sup>1</sup></a>&emsp;
   <a>Wei Ye<sup>1,†</sup></a>
   <div>
-    <br/>
     <p>
       <sup>1</sup>Peking University&emsp;
       <sup>2</sup>WeChat AI&emsp;
@@ -87,7 +79,7 @@ import rewardanything
 # Load model locally (similar to HuggingFace)
 reward_model = rewardanything.from_pretrained(
-    "zhuohaoyu/RewardAnything-8B-v1",  # Model path/name
     device="cuda",                        # Device placement
     torch_dtype="auto"                   # Automatic dtype selection
 )
@@ -131,7 +123,7 @@ First, install and start a vLLM server. See the [vLLM quickstart guide](https://
 pip install vllm
 # Start vLLM server with RewardAnything model
-vllm serve zhuohaoyu/RewardAnything-8B-v1 \
     --host 0.0.0.0 \
     --port 8000 \
     --max-model-len 8192 \
@@ -145,7 +137,7 @@ Create a config file `config.json`:
 ```json
 {
   "api_key": ["dummy-key-for-vllm"],
-  "api_model": "zhuohaoyu/RewardAnything-8B-v1",
   "api_base": ["http://localhost:8000/v1"],
   "api_timeout": 120.0,
   "generation_config": {
@@ -205,11 +197,11 @@ from rewardanything.processing import prepare_chat_messages, parse_rewardanythin
 # Load model and tokenizer directly
 model = AutoModelForCausalLM.from_pretrained(
-    "zhuohaoyu/RewardAnything-8B-v1",
     torch_dtype="auto",
     device_map="auto"
 )
-tokenizer = AutoTokenizer.from_pretrained("zhuohaoyu/RewardAnything-8B-v1")
 # Prepare evaluation data
 principle = "Judge responses based on helpfulness and accuracy"

 base_model: Qwen/Qwen1.5-7B-Chat
 # (Assuming Qwen1.5-7B-Chat is the closest equivalent, as qwen3-8b is not a standard HF model name. Please adjust if a more precise base_model identifier is available)
 ---
 <div align="center">
   <picture>
+    <source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/zhuohaoyu/RewardAnything/main/assets/rewardanything-logo-horizontal-dark-mode.png">
+    <source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/zhuohaoyu/RewardAnything/main/assets/rewardanything-logo-horizontal.png">
+    <img alt="RewardAnything" src="https://raw.githubusercontent.com/zhuohaoyu/RewardAnything/main/assets/rewardanything-logo-horizontal.png" width="400">
     </picture>
   <p>
     <a href="https://zhuohaoyu.github.io/RewardAnything"><img alt="Website" src="https://img.shields.io/badge/🌐_Project-Website-A593C2?style=flat-square&labelColor=8A7AA8"></a>
+    <a href="https://huggingface.co/WisdomShell/RewardAnything-8B-v1"><img alt="Model Weights" src="https://img.shields.io/badge/🤗_HuggingFace-Model_Weights-D4A574?style=flat-square&labelColor=B8956A"></a>
     <a href="https://arxiv.org/abs/XXXX.XXXXX"><img alt="Paper" src="https://img.shields.io/badge/📄_arXiv-Paper-C7969C?style=flat-square&labelColor=A8798A"></a>
     <a href="https://pypi.org/project/rewardanything/"><img alt="PyPI" src="https://img.shields.io/pypi/v/rewardanything.svg?style=flat-square&color=7B9BB3&labelColor=5A7A94"></a>
     </p>
+  <h1> RewardAnything: Generalizable Principle-Following Reward Models </h1>
   <a>Zhuohao Yu<sup>1,§</sup></a>&emsp;
   <a>Jiali Zeng<sup>2</sup></a>&emsp;
   <a>Weizheng Gu<sup>1</sup></a>&emsp;
   <a>Shikun Zhang<sup>1</sup></a>&emsp;
   <a>Wei Ye<sup>1,†</sup></a>
   <div>
     <p>
       <sup>1</sup>Peking University&emsp;
       <sup>2</sup>WeChat AI&emsp;
 # Load model locally (similar to HuggingFace)
 reward_model = rewardanything.from_pretrained(
+    "WisdomShell/RewardAnything-8B-v1",  # Model path/name
     device="cuda",                        # Device placement
     torch_dtype="auto"                   # Automatic dtype selection
 )
 pip install vllm
 # Start vLLM server with RewardAnything model
+vllm serve WisdomShell/RewardAnything-8B-v1 \
     --host 0.0.0.0 \
     --port 8000 \
     --max-model-len 8192 \
 ```json
 {
   "api_key": ["dummy-key-for-vllm"],
+  "api_model": "WisdomShell/RewardAnything-8B-v1",
   "api_base": ["http://localhost:8000/v1"],
   "api_timeout": 120.0,
   "generation_config": {
 # Load model and tokenizer directly
 model = AutoModelForCausalLM.from_pretrained(
+    "WisdomShell/RewardAnything-8B-v1",
     torch_dtype="auto",
     device_map="auto"
 )
+tokenizer = AutoTokenizer.from_pretrained("WisdomShell/RewardAnything-8B-v1")
 # Prepare evaluation data
 principle = "Judge responses based on helpfulness and accuracy"