Update README.md
Browse files
README.md
CHANGED
@@ -101,7 +101,7 @@ Baselines were evaluated using their respective chat-template and system prompts
|
|
101 |
|
102 |
#### Zero-shot MGSM
|
103 |
|
104 |
-
[SeaLLM-7B-v2](https://huggingface.co/SeaLLMs/SeaLLM-7B-v2) also outperforms GPT-3.5 and Qwen-14B on the multilingual MGSM for
|
105 |
|
106 |
| Model | MGSM-Zh | MGSM-Th
|
107 |
|-----| ----- | ---
|
@@ -126,27 +126,6 @@ We evaluate models on 3 benchmarks following the recommended default setups: 5-s
|
|
126 |
| SeaLLM-7B-v2.5 | Multi | 64.05 | 76.87 | 62.54 | 63.11 | 53.30 | 48.64 | 46.86
|
127 |
|
128 |
|
129 |
-
### MT-Bench
|
130 |
-
|
131 |
-
**SeaLLM-7B-v2.5 only score 7.40 on MT-bench, better preference tuning is needed**
|
132 |
-
On the English [MT-bench](https://arxiv.org/abs/2306.05685) metric, SeaLLM-7B-v2 achieves **7.54** score on the MT-bench (3rd place on the leaderboard for 7B category), outperforms many 70B models and is arguably the only one that handles 10 SEA languages.
|
133 |
-
|
134 |
-
Refer to [mt_bench/seallm_7b_v2.jsonl](https://huggingface.co/SeaLLMs/SeaLLM-7B-v2/blob/main/evaluation/mt_bench/seallm_7b_v2.jsonl) for the MT-bench predictions of SeaLLM-7B-v2, and [here](https://github.com/lm-sys/FastChat/issues/3013#issue-2118685341) to reproduce it.
|
135 |
-
|
136 |
-
| Model | Access | Langs | MT-Bench
|
137 |
-
| --- | --- | --- | --- |
|
138 |
-
| GPT-4-turbo | closed | multi | 9.32
|
139 |
-
| GPT-4-0613 | closed | multi | 9.18
|
140 |
-
| Mixtral-8x7b (46B) | open | multi | 8.3
|
141 |
-
| Starling-LM-7B-alpha | open | mono (en) | 8.0
|
142 |
-
| OpenChat-3.5-7B | open | mono (en) | 7.81
|
143 |
-
| **SeaLLM-7B-v2** | **open** | **multi (10+)** | **7.54**
|
144 |
-
| **SeaLLM-7B-v2.5** | **open** | **multi (10+)** | **7.40**
|
145 |
-
| [Qwen-14B](https://huggingface.co/Qwen/Qwen-14B-Chat) | open | multi | 6.96
|
146 |
-
| [Llama-2-70B](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) | open | mono (en) | 6.86
|
147 |
-
| Mistral-7B-instuct | open | mono (en) | 6.84
|
148 |
-
|
149 |
-
|
150 |
### Sea-Bench
|
151 |
|
152 |
Not ready
|
@@ -165,7 +144,6 @@ Hello world<eos>
|
|
165 |
<|im_start|>assistant
|
166 |
Hi there, how can I help?<eos>"""
|
167 |
|
168 |
-
# NOTE: previous commit has \n between </s> and <|im_start|>, that was incorrect!
|
169 |
# <|im_start|> is not a special token.
|
170 |
# Transformers chat_template should be consistent with vLLM format below.
|
171 |
|
@@ -176,6 +154,9 @@ print(tokenizer.convert_ids_to_tokens(tokenizer.encode(prompt)))
|
|
176 |
```
|
177 |
|
178 |
#### Using transformers's chat_template
|
|
|
|
|
|
|
179 |
```python
|
180 |
|
181 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
@@ -183,8 +164,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
183 |
device = "cuda" # the device to load the model onto
|
184 |
|
185 |
# use bfloat16 to ensure the best performance.
|
186 |
-
model = AutoModelForCausalLM.from_pretrained("SeaLLMs/SeaLLM-7B-v2", torch_dtype=torch.bfloat16, device_map=device)
|
187 |
-
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLM-7B-v2")
|
188 |
|
189 |
messages = [
|
190 |
{"role": "system", "content": "You are a helpful assistant."},
|
@@ -195,7 +176,6 @@ messages = [
|
|
195 |
|
196 |
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
|
197 |
print(tokenizer.convert_ids_to_tokens(encodeds[0]))
|
198 |
-
# ['<s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'system', '<0x0A>', 'You', '▁are', '▁a', '▁helpful', '▁assistant', '.', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'user', '<0x0A>', 'Hello', '▁world', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'ass', 'istant', '<0x0A>', 'Hi', '▁there', ',', '▁how', '▁can', '▁I', '▁help', '▁you', '▁today', '?', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'user', '<0x0A>', 'Ex', 'plain', '▁general', '▁rel', 'ativity', '▁in', '▁details', '.', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'ass', 'istant', '<0x0A>']
|
199 |
|
200 |
model_inputs = encodeds.to(device)
|
201 |
model.to(device)
|
@@ -210,11 +190,9 @@ print(decoded[0])
|
|
210 |
|
211 |
```python
|
212 |
from vllm import LLM, SamplingParams
|
213 |
-
TURN_TEMPLATE = "<|im_start|>{role}\n{content}
|
214 |
TURN_PREFIX = "<|im_start|>{role}\n"
|
215 |
|
216 |
-
# There is no \n between </s> and <|im_start|>.
|
217 |
-
|
218 |
def seallm_chat_convo_format(conversations, add_assistant_prefix: bool, system_prompt=None):
|
219 |
# conversations: list of dict with key `role` and `content` (openai format)
|
220 |
if conversations[0]['role'] != 'system' and system_prompt is not None:
|
@@ -228,8 +206,8 @@ def seallm_chat_convo_format(conversations, add_assistant_prefix: bool, system_p
|
|
228 |
text += prompt
|
229 |
return text
|
230 |
|
231 |
-
sparams = SamplingParams(temperature=0.1, max_tokens=1024, stop=['
|
232 |
-
llm = LLM("SeaLLMs/SeaLLM-7B-v2", dtype="bfloat16")
|
233 |
|
234 |
message = "Explain general relativity in details."
|
235 |
prompt = seallm_chat_convo_format(message, True)
|
@@ -238,7 +216,7 @@ gen = llm.generate(prompt, sampling_params)
|
|
238 |
print(gen[0].outputs[0].text)
|
239 |
```
|
240 |
|
241 |
-
#### Fine-tuning SeaLLM-7B-v2
|
242 |
|
243 |
Should follow the chat format and accurately mask out source tokens. Here is an example.
|
244 |
|
@@ -250,7 +228,7 @@ conversations = [
|
|
250 |
{"role": "user", "content": "Tell me a joke."},
|
251 |
{"role": "assistant", "content": "Why don't scientists trust atoms? Because they make up everything."},
|
252 |
]
|
253 |
-
def
|
254 |
"""
|
255 |
Inputs:
|
256 |
conversations: list of dict following openai format, eg
|
@@ -271,7 +249,7 @@ def seallm_7b_v2_tokenize_multi_turns(tokenizer, conversations, add_assistant_pr
|
|
271 |
labels = sample['input_ids'].clone()
|
272 |
labels[sample['token_type_ids'] == 0] = -100
|
273 |
"""
|
274 |
-
TURN_TEMPLATE = "<|im_start|>{role}\n{content}
|
275 |
TURN_PREFIX = "<|im_start|>{role}\n"
|
276 |
sample = None
|
277 |
assistant_prefix_len = None
|
@@ -304,12 +282,9 @@ def seallm_7b_v2_tokenize_multi_turns(tokenizer, conversations, add_assistant_pr
|
|
304 |
return sample
|
305 |
|
306 |
# ! testing
|
307 |
-
sample =
|
308 |
print(tokenizer.convert_ids_to_tokens(sample['input_ids']))
|
309 |
print(sample['token_type_ids'])
|
310 |
-
# ['<s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'system', '<0x0A>', 'You', '▁are', '▁hel', 'ful', '▁assistant', '.', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'user', '<0x0A>', 'Hello', '▁world', '.', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'ass', 'istant', '<0x0A>', 'Hi', '▁there', ',', '▁how', '▁can', '▁I', '▁help', '?', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'user', '<0x0A>', 'Tell', '▁me', '▁a', '▁joke', '.', '</s>', '▁<', '|', 'im', '_', 'start', '|', '>', 'ass', 'istant', '<0x0A>', 'Why', '▁don', "'", 't', '▁scientists', '▁trust', '▁atoms', '?', '▁Because', '▁they', '▁make', '▁up', '▁everything', '.', '</s>']
|
311 |
-
# [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
|
312 |
-
|
313 |
|
314 |
|
315 |
```
|
@@ -329,7 +304,7 @@ If you find our project useful, we hope you would kindly star our repo and cite
|
|
329 |
|
330 |
```
|
331 |
@article{damonlpsg2023seallm,
|
332 |
-
author = {Xuan-Phi Nguyen*, Wenxuan Zhang*, Xin Li*, Mahani Aljunied*,
|
333 |
Zhiqiang Hu, Chenhui Shen^, Yew Ken Chia^, Xingxuan Li, Jianyu Wang,
|
334 |
Qingyu Tan, Liying Cheng, Guanzheng Chen, Yue Deng, Sen Yang,
|
335 |
Chaoqun Liu, Hang Zhang, Lidong Bing},
|
|
|
101 |
|
102 |
#### Zero-shot MGSM
|
103 |
|
104 |
+
[SeaLLM-7B-v2.5](https://huggingface.co/SeaLLMs/SeaLLM-7B-v2.5) also outperforms GPT-3.5 and Qwen-14B on the multilingual MGSM for Thai.
|
105 |
|
106 |
| Model | MGSM-Zh | MGSM-Th
|
107 |
|-----| ----- | ---
|
|
|
126 |
| SeaLLM-7B-v2.5 | Multi | 64.05 | 76.87 | 62.54 | 63.11 | 53.30 | 48.64 | 46.86
|
127 |
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
### Sea-Bench
|
130 |
|
131 |
Not ready
|
|
|
144 |
<|im_start|>assistant
|
145 |
Hi there, how can I help?<eos>"""
|
146 |
|
|
|
147 |
# <|im_start|> is not a special token.
|
148 |
# Transformers chat_template should be consistent with vLLM format below.
|
149 |
|
|
|
154 |
```
|
155 |
|
156 |
#### Using transformers's chat_template
|
157 |
+
|
158 |
+
Install the latest transformers (>4.40)
|
159 |
+
|
160 |
```python
|
161 |
|
162 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
164 |
device = "cuda" # the device to load the model onto
|
165 |
|
166 |
# use bfloat16 to ensure the best performance.
|
167 |
+
model = AutoModelForCausalLM.from_pretrained("SeaLLMs/SeaLLM-7B-v2.5", torch_dtype=torch.bfloat16, device_map=device)
|
168 |
+
tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLM-7B-v2.5")
|
169 |
|
170 |
messages = [
|
171 |
{"role": "system", "content": "You are a helpful assistant."},
|
|
|
176 |
|
177 |
encodeds = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True)
|
178 |
print(tokenizer.convert_ids_to_tokens(encodeds[0]))
|
|
|
179 |
|
180 |
model_inputs = encodeds.to(device)
|
181 |
model.to(device)
|
|
|
190 |
|
191 |
```python
|
192 |
from vllm import LLM, SamplingParams
|
193 |
+
TURN_TEMPLATE = "<|im_start|>{role}\n{content}<eos>\n"
|
194 |
TURN_PREFIX = "<|im_start|>{role}\n"
|
195 |
|
|
|
|
|
196 |
def seallm_chat_convo_format(conversations, add_assistant_prefix: bool, system_prompt=None):
|
197 |
# conversations: list of dict with key `role` and `content` (openai format)
|
198 |
if conversations[0]['role'] != 'system' and system_prompt is not None:
|
|
|
206 |
text += prompt
|
207 |
return text
|
208 |
|
209 |
+
sparams = SamplingParams(temperature=0.1, max_tokens=1024, stop=['<eos>', '<|im_start|>'])
|
210 |
+
llm = LLM("SeaLLMs/SeaLLM-7B-v2.5", dtype="bfloat16")
|
211 |
|
212 |
message = "Explain general relativity in details."
|
213 |
prompt = seallm_chat_convo_format(message, True)
|
|
|
216 |
print(gen[0].outputs[0].text)
|
217 |
```
|
218 |
|
219 |
+
#### Fine-tuning SeaLLM-7B-v2.5
|
220 |
|
221 |
Should follow the chat format and accurately mask out source tokens. Here is an example.
|
222 |
|
|
|
228 |
{"role": "user", "content": "Tell me a joke."},
|
229 |
{"role": "assistant", "content": "Why don't scientists trust atoms? Because they make up everything."},
|
230 |
]
|
231 |
+
def seallm_7b_v25_tokenize_multi_turns(tokenizer, conversations, add_assistant_prefix=False):
|
232 |
"""
|
233 |
Inputs:
|
234 |
conversations: list of dict following openai format, eg
|
|
|
249 |
labels = sample['input_ids'].clone()
|
250 |
labels[sample['token_type_ids'] == 0] = -100
|
251 |
"""
|
252 |
+
TURN_TEMPLATE = "<|im_start|>{role}\n{content}<eos>\n"
|
253 |
TURN_PREFIX = "<|im_start|>{role}\n"
|
254 |
sample = None
|
255 |
assistant_prefix_len = None
|
|
|
282 |
return sample
|
283 |
|
284 |
# ! testing
|
285 |
+
sample = seallm_7b_v25_tokenize_multi_turns(tokenizer, conversations)
|
286 |
print(tokenizer.convert_ids_to_tokens(sample['input_ids']))
|
287 |
print(sample['token_type_ids'])
|
|
|
|
|
|
|
288 |
|
289 |
|
290 |
```
|
|
|
304 |
|
305 |
```
|
306 |
@article{damonlpsg2023seallm,
|
307 |
+
author = {Xuan-Phi Nguyen*, Wenxuan Zhang*, Xin Li*, Mahani Aljunied*, Weiwen Xu, Hou Pong Chan,
|
308 |
Zhiqiang Hu, Chenhui Shen^, Yew Ken Chia^, Xingxuan Li, Jianyu Wang,
|
309 |
Qingyu Tan, Liying Cheng, Guanzheng Chen, Yue Deng, Sen Yang,
|
310 |
Chaoqun Liu, Hang Zhang, Lidong Bing},
|