RefalMachine commited on
Commit
c1a6d04
·
verified ·
1 Parent(s): 76f34ed

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +19 -0
  2. llmtf_eval_k0/daru_treewayabstractive.jsonl +0 -0
  3. llmtf_eval_k0/daru_treewayabstractive_params.jsonl +60 -0
  4. llmtf_eval_k0/daru_treewayabstractive_total.jsonl +8 -0
  5. llmtf_eval_k0/daru_treewayextractive.jsonl +3 -0
  6. llmtf_eval_k0/daru_treewayextractive_params.jsonl +55 -0
  7. llmtf_eval_k0/daru_treewayextractive_total.jsonl +7 -0
  8. llmtf_eval_k0/darumeru_MultiQ.jsonl +0 -0
  9. llmtf_eval_k0/darumeru_MultiQ_params.jsonl +60 -0
  10. llmtf_eval_k0/darumeru_MultiQ_total.jsonl +8 -0
  11. llmtf_eval_k0/darumeru_PARus.jsonl +0 -0
  12. llmtf_eval_k0/darumeru_PARus_params.jsonl +60 -0
  13. llmtf_eval_k0/darumeru_PARus_total.jsonl +7 -0
  14. llmtf_eval_k0/darumeru_RCB.jsonl +0 -0
  15. llmtf_eval_k0/darumeru_RCB_params.jsonl +60 -0
  16. llmtf_eval_k0/darumeru_RCB_total.jsonl +8 -0
  17. llmtf_eval_k0/darumeru_RWSD.jsonl +0 -0
  18. llmtf_eval_k0/darumeru_RWSD_params.jsonl +60 -0
  19. llmtf_eval_k0/darumeru_RWSD_total.jsonl +7 -0
  20. llmtf_eval_k0/darumeru_USE.jsonl +0 -0
  21. llmtf_eval_k0/darumeru_USE_params.jsonl +60 -0
  22. llmtf_eval_k0/darumeru_USE_total.jsonl +7 -0
  23. llmtf_eval_k0/darumeru_cp_para_en.jsonl +0 -0
  24. llmtf_eval_k0/darumeru_cp_para_en_params.jsonl +60 -0
  25. llmtf_eval_k0/darumeru_cp_para_en_total.jsonl +9 -0
  26. llmtf_eval_k0/darumeru_cp_para_ru.jsonl +0 -0
  27. llmtf_eval_k0/darumeru_cp_para_ru_params.jsonl +60 -0
  28. llmtf_eval_k0/darumeru_cp_para_ru_total.jsonl +9 -0
  29. llmtf_eval_k0/darumeru_cp_sent_en.jsonl +0 -0
  30. llmtf_eval_k0/darumeru_cp_sent_en_params.jsonl +60 -0
  31. llmtf_eval_k0/darumeru_cp_sent_en_total.jsonl +9 -0
  32. llmtf_eval_k0/darumeru_cp_sent_ru.jsonl +0 -0
  33. llmtf_eval_k0/darumeru_cp_sent_ru_params.jsonl +60 -0
  34. llmtf_eval_k0/darumeru_cp_sent_ru_total.jsonl +9 -0
  35. llmtf_eval_k0/darumeru_ruMMLU.jsonl +3 -0
  36. llmtf_eval_k0/darumeru_ruMMLU_params.jsonl +60 -0
  37. llmtf_eval_k0/darumeru_ruMMLU_total.jsonl +7 -0
  38. llmtf_eval_k0/darumeru_ruOpenBookQA.jsonl +0 -0
  39. llmtf_eval_k0/darumeru_ruOpenBookQA_params.jsonl +60 -0
  40. llmtf_eval_k0/darumeru_ruOpenBookQA_total.jsonl +8 -0
  41. llmtf_eval_k0/darumeru_ruTiE.jsonl +3 -0
  42. llmtf_eval_k0/darumeru_ruTiE_params.jsonl +60 -0
  43. llmtf_eval_k0/darumeru_ruTiE_total.jsonl +7 -0
  44. llmtf_eval_k0/darumeru_ruWorldTree.jsonl +0 -0
  45. llmtf_eval_k0/darumeru_ruWorldTree_params.jsonl +60 -0
  46. llmtf_eval_k0/darumeru_ruWorldTree_total.jsonl +8 -0
  47. llmtf_eval_k0/evaluation_log.txt +273 -0
  48. llmtf_eval_k0/evaluation_results.txt +2 -0
  49. llmtf_eval_k0/nlpcoreteam_enMMLU.jsonl +3 -0
  50. llmtf_eval_k0/nlpcoreteam_enMMLU_params.jsonl +55 -0
.gitattributes CHANGED
@@ -33,3 +33,22 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llmtf_eval_k0/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ llmtf_eval_k0/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
38
+ llmtf_eval_k0/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
39
+ llmtf_eval_k0/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
40
+ llmtf_eval_k0/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
41
+ llmtf_eval_k1/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
42
+ llmtf_eval_k1/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
43
+ llmtf_eval_k1/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
44
+ llmtf_eval_k1/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
45
+ llmtf_eval_k1/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
46
+ llmtf_eval_k5/daru_treewayabstractive.jsonl filter=lfs diff=lfs merge=lfs -text
47
+ llmtf_eval_k5/daru_treewayextractive.jsonl filter=lfs diff=lfs merge=lfs -text
48
+ llmtf_eval_k5/darumeru_MultiQ.jsonl filter=lfs diff=lfs merge=lfs -text
49
+ llmtf_eval_k5/darumeru_USE.jsonl filter=lfs diff=lfs merge=lfs -text
50
+ llmtf_eval_k5/darumeru_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
51
+ llmtf_eval_k5/darumeru_ruTiE.jsonl filter=lfs diff=lfs merge=lfs -text
52
+ llmtf_eval_k5/nlpcoreteam_enMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
53
+ llmtf_eval_k5/nlpcoreteam_ruMMLU.jsonl filter=lfs diff=lfs merge=lfs -text
54
+ llmtf_eval_k5/russiannlp_rucola_custom.jsonl filter=lfs diff=lfs merge=lfs -text
llmtf_eval_k0/daru_treewayabstractive.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/daru_treewayabstractive_params.jsonl ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645,
9
+ 198,
10
+ 271
11
+ ],
12
+ "max_length": 32768,
13
+ "max_new_tokens": 512,
14
+ "pad_token_id": 151643,
15
+ "stop_strings": [
16
+ "\n",
17
+ "\n\n"
18
+ ],
19
+ "temperature": 0.1,
20
+ "top_k": 40,
21
+ "top_p": 0.9,
22
+ "transformers_version": "4.38.2",
23
+ "trust_remote_code": [
24
+ false
25
+ ]
26
+ },
27
+ "conversation_template": {
28
+ "system_prompt": "",
29
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
30
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
31
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
32
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
33
+ "user_role": "user",
34
+ "bot_role": "assistant",
35
+ "system_role": "system",
36
+ "global_prefix": "",
37
+ "suffix": "<|im_start|>assistant\n",
38
+ "add_special_tokens": false,
39
+ "eos_token": "<|im_end|>"
40
+ },
41
+ "load_in_8bit": false,
42
+ "torch_dtype": "auto",
43
+ "use_flash_attention_2": true,
44
+ "device_map": "cuda:0",
45
+ "use_fast_tokenizer": true,
46
+ "leading_space": false,
47
+ "space_token": null,
48
+ "trust_remote_code": [
49
+ false
50
+ ],
51
+ "max_model_len": 32768
52
+ },
53
+ "task_params": {
54
+ "max_len": 4000,
55
+ "few_shot_count": 0,
56
+ "batch_size": 1,
57
+ "max_sample_per_dataset": 500,
58
+ "method": "generate"
59
+ }
60
+ }
llmtf_eval_k0/daru_treewayabstractive_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "daru/treewayabstractive",
3
+ "results": {
4
+ "rouge1": 0.2813082731366044,
5
+ "rouge2": 0.10129515826302848
6
+ },
7
+ "leaderboard_result": 0.19130171569981644
8
+ }
llmtf_eval_k0/daru_treewayextractive.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ca290a5c85ddb32a4da9e675786e739d29e8edcb2723e9650859f391b53ca42
3
+ size 2139832235
llmtf_eval_k0/daru_treewayextractive_params.jsonl ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645
9
+ ],
10
+ "max_length": 32768,
11
+ "max_new_tokens": 1,
12
+ "pad_token_id": 151643,
13
+ "stop_strings": [],
14
+ "temperature": 0.1,
15
+ "top_k": 40,
16
+ "top_p": 0.9,
17
+ "transformers_version": "4.38.2",
18
+ "trust_remote_code": [
19
+ false
20
+ ]
21
+ },
22
+ "conversation_template": {
23
+ "system_prompt": "",
24
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
25
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
26
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
27
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
28
+ "user_role": "user",
29
+ "bot_role": "assistant",
30
+ "system_role": "system",
31
+ "global_prefix": "",
32
+ "suffix": "<|im_start|>assistant\n",
33
+ "add_special_tokens": false,
34
+ "eos_token": "<|im_end|>"
35
+ },
36
+ "load_in_8bit": false,
37
+ "torch_dtype": "auto",
38
+ "use_flash_attention_2": true,
39
+ "device_map": "cuda:0",
40
+ "use_fast_tokenizer": true,
41
+ "leading_space": false,
42
+ "space_token": null,
43
+ "trust_remote_code": [
44
+ false
45
+ ],
46
+ "max_model_len": 32768
47
+ },
48
+ "task_params": {
49
+ "max_len": 4000,
50
+ "few_shot_count": 0,
51
+ "batch_size": 1,
52
+ "max_sample_per_dataset": 500,
53
+ "method": "calculate_logsoftmax"
54
+ }
55
+ }
llmtf_eval_k0/daru_treewayextractive_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "daru/treewayextractive",
3
+ "results": {
4
+ "r-prec": 0.3917012265512266
5
+ },
6
+ "leaderboard_result": 0.3917012265512266
7
+ }
llmtf_eval_k0/darumeru_MultiQ.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_MultiQ_params.jsonl ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645,
9
+ 198,
10
+ 271
11
+ ],
12
+ "max_length": 32768,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 151643,
15
+ "stop_strings": [
16
+ "\n",
17
+ "\n\n"
18
+ ],
19
+ "temperature": 0.1,
20
+ "top_k": 40,
21
+ "top_p": 0.9,
22
+ "transformers_version": "4.38.2",
23
+ "trust_remote_code": [
24
+ false
25
+ ]
26
+ },
27
+ "conversation_template": {
28
+ "system_prompt": "",
29
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
30
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
31
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
32
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
33
+ "user_role": "user",
34
+ "bot_role": "assistant",
35
+ "system_role": "system",
36
+ "global_prefix": "",
37
+ "suffix": "<|im_start|>assistant\n",
38
+ "add_special_tokens": false,
39
+ "eos_token": "<|im_end|>"
40
+ },
41
+ "load_in_8bit": false,
42
+ "torch_dtype": "auto",
43
+ "use_flash_attention_2": true,
44
+ "device_map": "cuda:0",
45
+ "use_fast_tokenizer": true,
46
+ "leading_space": false,
47
+ "space_token": null,
48
+ "trust_remote_code": [
49
+ false
50
+ ],
51
+ "max_model_len": 32768
52
+ },
53
+ "task_params": {
54
+ "max_len": 4000,
55
+ "few_shot_count": 0,
56
+ "batch_size": 1,
57
+ "max_sample_per_dataset": 10000000000000,
58
+ "method": "generate"
59
+ }
60
+ }
llmtf_eval_k0/darumeru_MultiQ_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/MultiQ",
3
+ "results": {
4
+ "f1": 0.23082049162131107,
5
+ "em": 0.13288718929254303
6
+ },
7
+ "leaderboard_result": 0.18185384045692704
8
+ }
llmtf_eval_k0/darumeru_PARus.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_PARus_params.jsonl ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645,
9
+ 198,
10
+ 271
11
+ ],
12
+ "max_length": 32768,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 151643,
15
+ "stop_strings": [
16
+ "\n",
17
+ "\n\n"
18
+ ],
19
+ "temperature": 0.1,
20
+ "top_k": 40,
21
+ "top_p": 0.9,
22
+ "transformers_version": "4.38.2",
23
+ "trust_remote_code": [
24
+ false
25
+ ]
26
+ },
27
+ "conversation_template": {
28
+ "system_prompt": "",
29
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
30
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
31
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
32
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
33
+ "user_role": "user",
34
+ "bot_role": "assistant",
35
+ "system_role": "system",
36
+ "global_prefix": "",
37
+ "suffix": "<|im_start|>assistant\n",
38
+ "add_special_tokens": false,
39
+ "eos_token": "<|im_end|>"
40
+ },
41
+ "load_in_8bit": false,
42
+ "torch_dtype": "auto",
43
+ "use_flash_attention_2": true,
44
+ "device_map": "cuda:0",
45
+ "use_fast_tokenizer": true,
46
+ "leading_space": false,
47
+ "space_token": null,
48
+ "trust_remote_code": [
49
+ false
50
+ ],
51
+ "max_model_len": 32768
52
+ },
53
+ "task_params": {
54
+ "max_len": 4000,
55
+ "few_shot_count": 0,
56
+ "batch_size": 1,
57
+ "max_sample_per_dataset": 10000000000000,
58
+ "method": "calculate_tokens_proba"
59
+ }
60
+ }
llmtf_eval_k0/darumeru_PARus_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/PARus",
3
+ "results": {
4
+ "acc": 0.79
5
+ },
6
+ "leaderboard_result": 0.79
7
+ }
llmtf_eval_k0/darumeru_RCB.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_RCB_params.jsonl ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645,
9
+ 198,
10
+ 271
11
+ ],
12
+ "max_length": 32768,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 151643,
15
+ "stop_strings": [
16
+ "\n",
17
+ "\n\n"
18
+ ],
19
+ "temperature": 0.1,
20
+ "top_k": 40,
21
+ "top_p": 0.9,
22
+ "transformers_version": "4.38.2",
23
+ "trust_remote_code": [
24
+ false
25
+ ]
26
+ },
27
+ "conversation_template": {
28
+ "system_prompt": "",
29
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
30
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
31
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
32
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
33
+ "user_role": "user",
34
+ "bot_role": "assistant",
35
+ "system_role": "system",
36
+ "global_prefix": "",
37
+ "suffix": "<|im_start|>assistant\n",
38
+ "add_special_tokens": false,
39
+ "eos_token": "<|im_end|>"
40
+ },
41
+ "load_in_8bit": false,
42
+ "torch_dtype": "auto",
43
+ "use_flash_attention_2": true,
44
+ "device_map": "cuda:0",
45
+ "use_fast_tokenizer": true,
46
+ "leading_space": false,
47
+ "space_token": null,
48
+ "trust_remote_code": [
49
+ false
50
+ ],
51
+ "max_model_len": 32768
52
+ },
53
+ "task_params": {
54
+ "max_len": 4000,
55
+ "few_shot_count": 0,
56
+ "batch_size": 1,
57
+ "max_sample_per_dataset": 10000000000000,
58
+ "method": "calculate_tokens_proba"
59
+ }
60
+ }
llmtf_eval_k0/darumeru_RCB_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/RCB",
3
+ "results": {
4
+ "acc": 0.5181818181818182,
5
+ "f1_macro": 0.4329109928238468
6
+ },
7
+ "leaderboard_result": 0.4755464055028325
8
+ }
llmtf_eval_k0/darumeru_RWSD.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_RWSD_params.jsonl ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645,
9
+ 198,
10
+ 271
11
+ ],
12
+ "max_length": 32768,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 151643,
15
+ "stop_strings": [
16
+ "\n",
17
+ "\n\n"
18
+ ],
19
+ "temperature": 0.1,
20
+ "top_k": 40,
21
+ "top_p": 0.9,
22
+ "transformers_version": "4.38.2",
23
+ "trust_remote_code": [
24
+ false
25
+ ]
26
+ },
27
+ "conversation_template": {
28
+ "system_prompt": "",
29
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
30
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
31
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
32
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
33
+ "user_role": "user",
34
+ "bot_role": "assistant",
35
+ "system_role": "system",
36
+ "global_prefix": "",
37
+ "suffix": "<|im_start|>assistant\n",
38
+ "add_special_tokens": false,
39
+ "eos_token": "<|im_end|>"
40
+ },
41
+ "load_in_8bit": false,
42
+ "torch_dtype": "auto",
43
+ "use_flash_attention_2": true,
44
+ "device_map": "cuda:0",
45
+ "use_fast_tokenizer": true,
46
+ "leading_space": false,
47
+ "space_token": null,
48
+ "trust_remote_code": [
49
+ false
50
+ ],
51
+ "max_model_len": 32768
52
+ },
53
+ "task_params": {
54
+ "max_len": 4000,
55
+ "few_shot_count": 0,
56
+ "batch_size": 1,
57
+ "max_sample_per_dataset": 10000000000000,
58
+ "method": "calculate_tokens_proba"
59
+ }
60
+ }
llmtf_eval_k0/darumeru_RWSD_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/RWSD",
3
+ "results": {
4
+ "acc": 0.6029411764705882
5
+ },
6
+ "leaderboard_result": 0.6029411764705882
7
+ }
llmtf_eval_k0/darumeru_USE.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_USE_params.jsonl ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645,
9
+ 198,
10
+ 271
11
+ ],
12
+ "max_length": 32768,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 151643,
15
+ "stop_strings": [
16
+ "\n",
17
+ "\n\n"
18
+ ],
19
+ "temperature": 0.1,
20
+ "top_k": 40,
21
+ "top_p": 0.9,
22
+ "transformers_version": "4.38.2",
23
+ "trust_remote_code": [
24
+ false
25
+ ]
26
+ },
27
+ "conversation_template": {
28
+ "system_prompt": "",
29
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
30
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
31
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
32
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
33
+ "user_role": "user",
34
+ "bot_role": "assistant",
35
+ "system_role": "system",
36
+ "global_prefix": "",
37
+ "suffix": "<|im_start|>assistant\n",
38
+ "add_special_tokens": false,
39
+ "eos_token": "<|im_end|>"
40
+ },
41
+ "load_in_8bit": false,
42
+ "torch_dtype": "auto",
43
+ "use_flash_attention_2": true,
44
+ "device_map": "cuda:0",
45
+ "use_fast_tokenizer": true,
46
+ "leading_space": false,
47
+ "space_token": null,
48
+ "trust_remote_code": [
49
+ false
50
+ ],
51
+ "max_model_len": 32768
52
+ },
53
+ "task_params": {
54
+ "max_len": 4000,
55
+ "few_shot_count": 0,
56
+ "batch_size": 1,
57
+ "max_sample_per_dataset": 10000000000000,
58
+ "method": "generate"
59
+ }
60
+ }
llmtf_eval_k0/darumeru_USE_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/USE",
3
+ "results": {
4
+ "grade_norm": 0.153921568627451
5
+ },
6
+ "leaderboard_result": 0.153921568627451
7
+ }
llmtf_eval_k0/darumeru_cp_para_en.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_cp_para_en_params.jsonl ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645,
9
+ 198,
10
+ 271
11
+ ],
12
+ "max_length": 32768,
13
+ "max_new_tokens": 1024,
14
+ "pad_token_id": 151643,
15
+ "stop_strings": [
16
+ "\n",
17
+ "\n\n"
18
+ ],
19
+ "temperature": 0.1,
20
+ "top_k": 40,
21
+ "top_p": 0.9,
22
+ "transformers_version": "4.38.2",
23
+ "trust_remote_code": [
24
+ false
25
+ ]
26
+ },
27
+ "conversation_template": {
28
+ "system_prompt": "",
29
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
30
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
31
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
32
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
33
+ "user_role": "user",
34
+ "bot_role": "assistant",
35
+ "system_role": "system",
36
+ "global_prefix": "",
37
+ "suffix": "<|im_start|>assistant\n",
38
+ "add_special_tokens": false,
39
+ "eos_token": "<|im_end|>"
40
+ },
41
+ "load_in_8bit": false,
42
+ "torch_dtype": "auto",
43
+ "use_flash_attention_2": true,
44
+ "device_map": "cuda:0",
45
+ "use_fast_tokenizer": true,
46
+ "leading_space": false,
47
+ "space_token": null,
48
+ "trust_remote_code": [
49
+ false
50
+ ],
51
+ "max_model_len": 32768
52
+ },
53
+ "task_params": {
54
+ "max_len": 4000,
55
+ "few_shot_count": 0,
56
+ "batch_size": 1,
57
+ "max_sample_per_dataset": 10000000000000,
58
+ "method": "generate"
59
+ }
60
+ }
llmtf_eval_k0/darumeru_cp_para_en_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_para_en",
3
+ "results": {
4
+ "symbol_per_token": 4.353034074577473,
5
+ "len": 0.9802575114789726,
6
+ "lcs": 0.9058441645642358
7
+ },
8
+ "leaderboard_result": 0.9058441645642358
9
+ }
llmtf_eval_k0/darumeru_cp_para_ru.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_cp_para_ru_params.jsonl ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645,
9
+ 198,
10
+ 271
11
+ ],
12
+ "max_length": 32768,
13
+ "max_new_tokens": 1024,
14
+ "pad_token_id": 151643,
15
+ "stop_strings": [
16
+ "\n",
17
+ "\n\n"
18
+ ],
19
+ "temperature": 0.1,
20
+ "top_k": 40,
21
+ "top_p": 0.9,
22
+ "transformers_version": "4.38.2",
23
+ "trust_remote_code": [
24
+ false
25
+ ]
26
+ },
27
+ "conversation_template": {
28
+ "system_prompt": "",
29
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
30
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
31
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
32
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
33
+ "user_role": "user",
34
+ "bot_role": "assistant",
35
+ "system_role": "system",
36
+ "global_prefix": "",
37
+ "suffix": "<|im_start|>assistant\n",
38
+ "add_special_tokens": false,
39
+ "eos_token": "<|im_end|>"
40
+ },
41
+ "load_in_8bit": false,
42
+ "torch_dtype": "auto",
43
+ "use_flash_attention_2": true,
44
+ "device_map": "cuda:0",
45
+ "use_fast_tokenizer": true,
46
+ "leading_space": false,
47
+ "space_token": null,
48
+ "trust_remote_code": [
49
+ false
50
+ ],
51
+ "max_model_len": 32768
52
+ },
53
+ "task_params": {
54
+ "max_len": 4000,
55
+ "few_shot_count": 0,
56
+ "batch_size": 1,
57
+ "max_sample_per_dataset": 10000000000000,
58
+ "method": "generate"
59
+ }
60
+ }
llmtf_eval_k0/darumeru_cp_para_ru_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_para_ru",
3
+ "results": {
4
+ "symbol_per_token": 2.654953066881009,
5
+ "len": 0.9876165527469303,
6
+ "lcs": 0.8268038045788856
7
+ },
8
+ "leaderboard_result": 0.8268038045788856
9
+ }
llmtf_eval_k0/darumeru_cp_sent_en.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_cp_sent_en_params.jsonl ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645,
9
+ 198,
10
+ 271
11
+ ],
12
+ "max_length": 32768,
13
+ "max_new_tokens": 128,
14
+ "pad_token_id": 151643,
15
+ "stop_strings": [
16
+ "\n",
17
+ "\n\n"
18
+ ],
19
+ "temperature": 0.1,
20
+ "top_k": 40,
21
+ "top_p": 0.9,
22
+ "transformers_version": "4.38.2",
23
+ "trust_remote_code": [
24
+ false
25
+ ]
26
+ },
27
+ "conversation_template": {
28
+ "system_prompt": "",
29
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
30
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
31
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
32
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
33
+ "user_role": "user",
34
+ "bot_role": "assistant",
35
+ "system_role": "system",
36
+ "global_prefix": "",
37
+ "suffix": "<|im_start|>assistant\n",
38
+ "add_special_tokens": false,
39
+ "eos_token": "<|im_end|>"
40
+ },
41
+ "load_in_8bit": false,
42
+ "torch_dtype": "auto",
43
+ "use_flash_attention_2": true,
44
+ "device_map": "cuda:0",
45
+ "use_fast_tokenizer": true,
46
+ "leading_space": false,
47
+ "space_token": null,
48
+ "trust_remote_code": [
49
+ false
50
+ ],
51
+ "max_model_len": 32768
52
+ },
53
+ "task_params": {
54
+ "max_len": 4000,
55
+ "few_shot_count": 0,
56
+ "batch_size": 1,
57
+ "max_sample_per_dataset": 10000000000000,
58
+ "method": "generate"
59
+ }
60
+ }
llmtf_eval_k0/darumeru_cp_sent_en_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_sent_en",
3
+ "results": {
4
+ "symbol_per_token": 4.31579408268585,
5
+ "len": 0.8895181009295375,
6
+ "lcs": 0.8825764242763656
7
+ },
8
+ "leaderboard_result": 0.8895181009295375
9
+ }
llmtf_eval_k0/darumeru_cp_sent_ru.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_cp_sent_ru_params.jsonl ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645,
9
+ 198,
10
+ 271
11
+ ],
12
+ "max_length": 32768,
13
+ "max_new_tokens": 128,
14
+ "pad_token_id": 151643,
15
+ "stop_strings": [
16
+ "\n",
17
+ "\n\n"
18
+ ],
19
+ "temperature": 0.1,
20
+ "top_k": 40,
21
+ "top_p": 0.9,
22
+ "transformers_version": "4.38.2",
23
+ "trust_remote_code": [
24
+ false
25
+ ]
26
+ },
27
+ "conversation_template": {
28
+ "system_prompt": "",
29
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
30
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
31
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
32
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
33
+ "user_role": "user",
34
+ "bot_role": "assistant",
35
+ "system_role": "system",
36
+ "global_prefix": "",
37
+ "suffix": "<|im_start|>assistant\n",
38
+ "add_special_tokens": false,
39
+ "eos_token": "<|im_end|>"
40
+ },
41
+ "load_in_8bit": false,
42
+ "torch_dtype": "auto",
43
+ "use_flash_attention_2": true,
44
+ "device_map": "cuda:0",
45
+ "use_fast_tokenizer": true,
46
+ "leading_space": false,
47
+ "space_token": null,
48
+ "trust_remote_code": [
49
+ false
50
+ ],
51
+ "max_model_len": 32768
52
+ },
53
+ "task_params": {
54
+ "max_len": 4000,
55
+ "few_shot_count": 0,
56
+ "batch_size": 1,
57
+ "max_sample_per_dataset": 10000000000000,
58
+ "method": "generate"
59
+ }
60
+ }
llmtf_eval_k0/darumeru_cp_sent_ru_total.jsonl ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/cp_sent_ru",
3
+ "results": {
4
+ "symbol_per_token": 2.531882961858218,
5
+ "len": 0.9931991516400629,
6
+ "lcs": 0.9651252623970072
7
+ },
8
+ "leaderboard_result": 0.9931991516400629
9
+ }
llmtf_eval_k0/darumeru_ruMMLU.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0efc9923721404aa22a0d7f16a5462d753529496db4794671bf3122a6e665f1e
3
+ size 32250795
llmtf_eval_k0/darumeru_ruMMLU_params.jsonl ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645,
9
+ 198,
10
+ 271
11
+ ],
12
+ "max_length": 32768,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 151643,
15
+ "stop_strings": [
16
+ "\n",
17
+ "\n\n"
18
+ ],
19
+ "temperature": 0.1,
20
+ "top_k": 40,
21
+ "top_p": 0.9,
22
+ "transformers_version": "4.38.2",
23
+ "trust_remote_code": [
24
+ false
25
+ ]
26
+ },
27
+ "conversation_template": {
28
+ "system_prompt": "",
29
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
30
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
31
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
32
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
33
+ "user_role": "user",
34
+ "bot_role": "assistant",
35
+ "system_role": "system",
36
+ "global_prefix": "",
37
+ "suffix": "<|im_start|>assistant\n",
38
+ "add_special_tokens": false,
39
+ "eos_token": "<|im_end|>"
40
+ },
41
+ "load_in_8bit": false,
42
+ "torch_dtype": "auto",
43
+ "use_flash_attention_2": true,
44
+ "device_map": "cuda:0",
45
+ "use_fast_tokenizer": true,
46
+ "leading_space": false,
47
+ "space_token": null,
48
+ "trust_remote_code": [
49
+ false
50
+ ],
51
+ "max_model_len": 32768
52
+ },
53
+ "task_params": {
54
+ "max_len": 4000,
55
+ "few_shot_count": 0,
56
+ "batch_size": 1,
57
+ "max_sample_per_dataset": 10000000000000,
58
+ "method": "calculate_tokens_proba"
59
+ }
60
+ }
llmtf_eval_k0/darumeru_ruMMLU_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruMMLU",
3
+ "results": {
4
+ "acc": 0.5755761747979646
5
+ },
6
+ "leaderboard_result": 0.5755761747979646
7
+ }
llmtf_eval_k0/darumeru_ruOpenBookQA.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_ruOpenBookQA_params.jsonl ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645,
9
+ 198,
10
+ 271
11
+ ],
12
+ "max_length": 32768,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 151643,
15
+ "stop_strings": [
16
+ "\n",
17
+ "\n\n"
18
+ ],
19
+ "temperature": 0.1,
20
+ "top_k": 40,
21
+ "top_p": 0.9,
22
+ "transformers_version": "4.38.2",
23
+ "trust_remote_code": [
24
+ false
25
+ ]
26
+ },
27
+ "conversation_template": {
28
+ "system_prompt": "",
29
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
30
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
31
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
32
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
33
+ "user_role": "user",
34
+ "bot_role": "assistant",
35
+ "system_role": "system",
36
+ "global_prefix": "",
37
+ "suffix": "<|im_start|>assistant\n",
38
+ "add_special_tokens": false,
39
+ "eos_token": "<|im_end|>"
40
+ },
41
+ "load_in_8bit": false,
42
+ "torch_dtype": "auto",
43
+ "use_flash_attention_2": true,
44
+ "device_map": "cuda:0",
45
+ "use_fast_tokenizer": true,
46
+ "leading_space": false,
47
+ "space_token": null,
48
+ "trust_remote_code": [
49
+ false
50
+ ],
51
+ "max_model_len": 32768
52
+ },
53
+ "task_params": {
54
+ "max_len": 4000,
55
+ "few_shot_count": 0,
56
+ "batch_size": 1,
57
+ "max_sample_per_dataset": 10000000000000,
58
+ "method": "calculate_tokens_proba"
59
+ }
60
+ }
llmtf_eval_k0/darumeru_ruOpenBookQA_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruOpenBookQA",
3
+ "results": {
4
+ "acc": 0.8178694158075601,
5
+ "f1_macro": 0.8177749093697874
6
+ },
7
+ "leaderboard_result": 0.8178221625886737
8
+ }
llmtf_eval_k0/darumeru_ruTiE.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:218542d4729b8fc7384fae25ede4aade4276134a7c3af8d643ad7fb242f2dfc6
3
+ size 12205558
llmtf_eval_k0/darumeru_ruTiE_params.jsonl ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645,
9
+ 198,
10
+ 271
11
+ ],
12
+ "max_length": 32768,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 151643,
15
+ "stop_strings": [
16
+ "\n",
17
+ "\n\n"
18
+ ],
19
+ "temperature": 0.1,
20
+ "top_k": 40,
21
+ "top_p": 0.9,
22
+ "transformers_version": "4.38.2",
23
+ "trust_remote_code": [
24
+ false
25
+ ]
26
+ },
27
+ "conversation_template": {
28
+ "system_prompt": "",
29
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
30
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
31
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
32
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
33
+ "user_role": "user",
34
+ "bot_role": "assistant",
35
+ "system_role": "system",
36
+ "global_prefix": "",
37
+ "suffix": "<|im_start|>assistant\n",
38
+ "add_special_tokens": false,
39
+ "eos_token": "<|im_end|>"
40
+ },
41
+ "load_in_8bit": false,
42
+ "torch_dtype": "auto",
43
+ "use_flash_attention_2": true,
44
+ "device_map": "cuda:0",
45
+ "use_fast_tokenizer": true,
46
+ "leading_space": false,
47
+ "space_token": null,
48
+ "trust_remote_code": [
49
+ false
50
+ ],
51
+ "max_model_len": 32768
52
+ },
53
+ "task_params": {
54
+ "max_len": 4000,
55
+ "few_shot_count": 0,
56
+ "batch_size": 1,
57
+ "max_sample_per_dataset": 10000000000000,
58
+ "method": "calculate_tokens_proba"
59
+ }
60
+ }
llmtf_eval_k0/darumeru_ruTiE_total.jsonl ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruTiE",
3
+ "results": {
4
+ "acc": 0.6348837209302326
5
+ },
6
+ "leaderboard_result": 0.6348837209302326
7
+ }
llmtf_eval_k0/darumeru_ruWorldTree.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
llmtf_eval_k0/darumeru_ruWorldTree_params.jsonl ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645,
9
+ 198,
10
+ 271
11
+ ],
12
+ "max_length": 32768,
13
+ "max_new_tokens": 64,
14
+ "pad_token_id": 151643,
15
+ "stop_strings": [
16
+ "\n",
17
+ "\n\n"
18
+ ],
19
+ "temperature": 0.1,
20
+ "top_k": 40,
21
+ "top_p": 0.9,
22
+ "transformers_version": "4.38.2",
23
+ "trust_remote_code": [
24
+ false
25
+ ]
26
+ },
27
+ "conversation_template": {
28
+ "system_prompt": "",
29
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
30
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
31
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
32
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
33
+ "user_role": "user",
34
+ "bot_role": "assistant",
35
+ "system_role": "system",
36
+ "global_prefix": "",
37
+ "suffix": "<|im_start|>assistant\n",
38
+ "add_special_tokens": false,
39
+ "eos_token": "<|im_end|>"
40
+ },
41
+ "load_in_8bit": false,
42
+ "torch_dtype": "auto",
43
+ "use_flash_attention_2": true,
44
+ "device_map": "cuda:0",
45
+ "use_fast_tokenizer": true,
46
+ "leading_space": false,
47
+ "space_token": null,
48
+ "trust_remote_code": [
49
+ false
50
+ ],
51
+ "max_model_len": 32768
52
+ },
53
+ "task_params": {
54
+ "max_len": 4000,
55
+ "few_shot_count": 0,
56
+ "batch_size": 1,
57
+ "max_sample_per_dataset": 10000000000000,
58
+ "method": "calculate_tokens_proba"
59
+ }
60
+ }
llmtf_eval_k0/darumeru_ruWorldTree_total.jsonl ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task_name": "darumeru/ruWorldTree",
3
+ "results": {
4
+ "acc": 0.9047619047619048,
5
+ "f1_macro": 0.9043845651108326
6
+ },
7
+ "leaderboard_result": 0.9045732349363687
8
+ }
llmtf_eval_k0/evaluation_log.txt ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFO: 2024-07-12 13:32:20,716: llmtf.base.evaluator: Starting eval on ['darumeru/multiq', 'darumeru/parus', 'darumeru/rcb', 'darumeru/ruopenbookqa', 'darumeru/rutie', 'darumeru/ruworldtree', 'darumeru/rwsd', 'darumeru/use', 'russiannlp/rucola_custom']
2
+ INFO: 2024-07-12 13:32:20,717: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645, 198, 271]
3
+ INFO: 2024-07-12 13:32:20,717: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
4
+ INFO: 2024-07-12 13:32:21,381: llmtf.base.evaluator: Starting eval on ['darumeru/rummlu']
5
+ INFO: 2024-07-12 13:32:21,381: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645, 198, 271]
6
+ INFO: 2024-07-12 13:32:21,381: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
7
+ INFO: 2024-07-12 13:32:23,188: llmtf.base.evaluator: Starting eval on ['nlpcoreteam/rummlu']
8
+ INFO: 2024-07-12 13:32:23,189: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645]
9
+ INFO: 2024-07-12 13:32:23,189: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
10
+ INFO: 2024-07-12 13:32:24,860: llmtf.base.darumeru/MultiQ: Loading Dataset: 4.14s
11
+ INFO: 2024-07-12 13:32:24,879: llmtf.base.evaluator: Starting eval on ['nlpcoreteam/enmmlu']
12
+ INFO: 2024-07-12 13:32:24,879: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645]
13
+ INFO: 2024-07-12 13:32:24,879: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
14
+ INFO: 2024-07-12 13:32:26,606: llmtf.base.evaluator: Starting eval on ['daru/treewayabstractive']
15
+ INFO: 2024-07-12 13:32:26,607: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645, 198, 271]
16
+ INFO: 2024-07-12 13:32:26,607: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
17
+ INFO: 2024-07-12 13:32:28,649: llmtf.base.evaluator: Starting eval on ['daru/treewayextractive']
18
+ INFO: 2024-07-12 13:32:28,649: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645]
19
+ INFO: 2024-07-12 13:32:28,649: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
20
+ INFO: 2024-07-12 13:32:30,170: llmtf.base.darumeru/ruMMLU: Loading Dataset: 8.79s
21
+ INFO: 2024-07-12 13:32:31,039: llmtf.base.evaluator: Starting eval on ['darumeru/cp_sent_ru', 'darumeru/cp_sent_en', 'darumeru/cp_para_ru', 'darumeru/cp_para_en']
22
+ INFO: 2024-07-12 13:32:31,040: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645, 198, 271]
23
+ INFO: 2024-07-12 13:32:31,040: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
24
+ INFO: 2024-07-12 13:32:31,528: llmtf.base.daru/treewayabstractive: Loading Dataset: 4.92s
25
+ INFO: 2024-07-12 13:32:33,602: llmtf.base.darumeru/cp_sent_ru: Loading Dataset: 2.56s
26
+ INFO: 2024-07-12 13:32:41,690: llmtf.base.daru/treewayextractive: Loading Dataset: 13.04s
27
+ INFO: 2024-07-12 13:34:36,781: llmtf.base.nlpcoreteam/enMMLU: Loading Dataset: 131.90s
28
+ INFO: 2024-07-12 13:34:38,322: llmtf.base.nlpcoreteam/ruMMLU: Loading Dataset: 135.13s
29
+ INFO: 2024-07-12 13:39:29,078: llmtf.base.darumeru/ruMMLU: Processing Dataset: 418.89s
30
+ INFO: 2024-07-12 13:39:29,081: llmtf.base.darumeru/ruMMLU: Results for darumeru/ruMMLU:
31
+ INFO: 2024-07-12 13:39:29,091: llmtf.base.darumeru/ruMMLU: {'acc': 0.5755761747979646}
32
+ INFO: 2024-07-12 13:39:29,136: llmtf.base.evaluator: Ended eval
33
+ INFO: 2024-07-12 13:39:29,141: llmtf.base.evaluator:
34
+ mean darumeru/ruMMLU
35
+ 0.576 0.576
36
+ INFO: 2024-07-12 13:42:34,523: llmtf.base.nlpcoreteam/enMMLU: Processing Dataset: 477.74s
37
+ INFO: 2024-07-12 13:42:34,527: llmtf.base.nlpcoreteam/enMMLU: Results for nlpcoreteam/enMMLU:
38
+ INFO: 2024-07-12 13:42:34,569: llmtf.base.nlpcoreteam/enMMLU: metric
39
+ subject
40
+ abstract_algebra 0.490000
41
+ anatomy 0.585185
42
+ astronomy 0.743421
43
+ business_ethics 0.750000
44
+ clinical_knowledge 0.762264
45
+ college_biology 0.784722
46
+ college_chemistry 0.460000
47
+ college_computer_science 0.630000
48
+ college_mathematics 0.420000
49
+ college_medicine 0.664740
50
+ college_physics 0.392157
51
+ computer_security 0.690000
52
+ conceptual_physics 0.697872
53
+ econometrics 0.570175
54
+ electrical_engineering 0.689655
55
+ elementary_mathematics 0.611111
56
+ formal_logic 0.507937
57
+ global_facts 0.440000
58
+ high_school_biology 0.809677
59
+ high_school_chemistry 0.615764
60
+ high_school_computer_science 0.780000
61
+ high_school_european_history 0.812121
62
+ high_school_geography 0.848485
63
+ high_school_government_and_politics 0.911917
64
+ high_school_macroeconomics 0.715385
65
+ high_school_mathematics 0.488889
66
+ high_school_microeconomics 0.802521
67
+ high_school_physics 0.456954
68
+ high_school_psychology 0.858716
69
+ high_school_statistics 0.675926
70
+ high_school_us_history 0.828431
71
+ high_school_world_history 0.848101
72
+ human_aging 0.686099
73
+ human_sexuality 0.755725
74
+ international_law 0.801653
75
+ jurisprudence 0.833333
76
+ logical_fallacies 0.815951
77
+ machine_learning 0.455357
78
+ management 0.776699
79
+ marketing 0.893162
80
+ medical_genetics 0.790000
81
+ miscellaneous 0.845466
82
+ moral_disputes 0.739884
83
+ moral_scenarios 0.422346
84
+ nutrition 0.774510
85
+ philosophy 0.742765
86
+ prehistory 0.746914
87
+ professional_accounting 0.570922
88
+ professional_law 0.505215
89
+ professional_medicine 0.687500
90
+ professional_psychology 0.722222
91
+ public_relations 0.727273
92
+ security_studies 0.722449
93
+ sociology 0.870647
94
+ us_foreign_policy 0.870000
95
+ virology 0.542169
96
+ world_religions 0.801170
97
+ INFO: 2024-07-12 13:42:34,577: llmtf.base.nlpcoreteam/enMMLU: metric
98
+ subject
99
+ STEM 0.605084
100
+ humanities 0.723525
101
+ other (business, health, misc.) 0.697765
102
+ social sciences 0.781293
103
+ INFO: 2024-07-12 13:42:34,584: llmtf.base.nlpcoreteam/enMMLU: {'acc': 0.7019166707055704}
104
+ INFO: 2024-07-12 13:42:34,616: llmtf.base.evaluator: Ended eval
105
+ INFO: 2024-07-12 13:42:34,636: llmtf.base.evaluator:
106
+ mean darumeru/ruMMLU nlpcoreteam/enMMLU
107
+ 0.639 0.576 0.702
108
+ INFO: 2024-07-12 13:42:36,791: llmtf.base.darumeru/cp_sent_ru: Processing Dataset: 603.17s
109
+ INFO: 2024-07-12 13:42:36,793: llmtf.base.darumeru/cp_sent_ru: Results for darumeru/cp_sent_ru:
110
+ INFO: 2024-07-12 13:42:36,798: llmtf.base.darumeru/cp_sent_ru: {'symbol_per_token': 2.531882961858218, 'len': 0.9931991516400629, 'lcs': 0.9651252623970072}
111
+ INFO: 2024-07-12 13:42:36,799: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645, 198, 271]
112
+ INFO: 2024-07-12 13:42:36,799: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
113
+ INFO: 2024-07-12 13:42:38,975: llmtf.base.darumeru/cp_sent_en: Loading Dataset: 2.17s
114
+ INFO: 2024-07-12 13:43:48,104: llmtf.base.nlpcoreteam/ruMMLU: Processing Dataset: 549.78s
115
+ INFO: 2024-07-12 13:43:48,105: llmtf.base.nlpcoreteam/ruMMLU: Results for nlpcoreteam/ruMMLU:
116
+ INFO: 2024-07-12 13:43:48,145: llmtf.base.nlpcoreteam/ruMMLU: metric
117
+ subject
118
+ abstract_algebra 0.430000
119
+ anatomy 0.562963
120
+ astronomy 0.677632
121
+ business_ethics 0.660000
122
+ clinical_knowledge 0.600000
123
+ college_biology 0.597222
124
+ college_chemistry 0.390000
125
+ college_computer_science 0.540000
126
+ college_mathematics 0.470000
127
+ college_medicine 0.554913
128
+ college_physics 0.372549
129
+ computer_security 0.660000
130
+ conceptual_physics 0.574468
131
+ econometrics 0.464912
132
+ electrical_engineering 0.558621
133
+ elementary_mathematics 0.576720
134
+ formal_logic 0.476190
135
+ global_facts 0.420000
136
+ high_school_biology 0.729032
137
+ high_school_chemistry 0.512315
138
+ high_school_computer_science 0.750000
139
+ high_school_european_history 0.745455
140
+ high_school_geography 0.717172
141
+ high_school_government_and_politics 0.663212
142
+ high_school_macroeconomics 0.615385
143
+ high_school_mathematics 0.455556
144
+ high_school_microeconomics 0.642857
145
+ high_school_physics 0.410596
146
+ high_school_psychology 0.759633
147
+ high_school_statistics 0.560185
148
+ high_school_us_history 0.715686
149
+ high_school_world_history 0.767932
150
+ human_aging 0.609865
151
+ human_sexuality 0.633588
152
+ international_law 0.710744
153
+ jurisprudence 0.611111
154
+ logical_fallacies 0.527607
155
+ machine_learning 0.348214
156
+ management 0.718447
157
+ marketing 0.769231
158
+ medical_genetics 0.610000
159
+ miscellaneous 0.688378
160
+ moral_disputes 0.638728
161
+ moral_scenarios 0.244693
162
+ nutrition 0.669935
163
+ philosophy 0.639871
164
+ prehistory 0.608025
165
+ professional_accounting 0.390071
166
+ professional_law 0.410039
167
+ professional_medicine 0.610294
168
+ professional_psychology 0.542484
169
+ public_relations 0.618182
170
+ security_studies 0.653061
171
+ sociology 0.741294
172
+ us_foreign_policy 0.740000
173
+ virology 0.506024
174
+ world_religions 0.719298
175
+ INFO: 2024-07-12 13:43:48,152: llmtf.base.nlpcoreteam/ruMMLU: metric
176
+ subject
177
+ STEM 0.534062
178
+ humanities 0.601183
179
+ other (business, health, misc.) 0.597866
180
+ social sciences 0.649315
181
+ INFO: 2024-07-12 13:43:48,160: llmtf.base.nlpcoreteam/ruMMLU: {'acc': 0.5956063721324766}
182
+ INFO: 2024-07-12 13:43:48,192: llmtf.base.evaluator: Ended eval
183
+ INFO: 2024-07-12 13:43:48,197: llmtf.base.evaluator:
184
+ mean darumeru/cp_sent_ru darumeru/ruMMLU nlpcoreteam/enMMLU nlpcoreteam/ruMMLU
185
+ 0.717 0.993 0.576 0.702 0.596
186
+ INFO: 2024-07-12 13:44:07,504: llmtf.base.darumeru/MultiQ: Processing Dataset: 702.63s
187
+ INFO: 2024-07-12 13:44:07,507: llmtf.base.darumeru/MultiQ: Results for darumeru/MultiQ:
188
+ INFO: 2024-07-12 13:44:07,511: llmtf.base.darumeru/MultiQ: {'f1': 0.23082049162131107, 'em': 0.13288718929254303}
189
+ INFO: 2024-07-12 13:44:07,516: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645, 198, 271]
190
+ INFO: 2024-07-12 13:44:07,516: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
191
+ INFO: 2024-07-12 13:44:09,506: llmtf.base.darumeru/PARus: Loading Dataset: 1.99s
192
+ INFO: 2024-07-12 13:44:15,843: llmtf.base.darumeru/PARus: Processing Dataset: 6.34s
193
+ INFO: 2024-07-12 13:44:15,844: llmtf.base.darumeru/PARus: Results for darumeru/PARus:
194
+ INFO: 2024-07-12 13:44:15,857: llmtf.base.darumeru/PARus: {'acc': 0.79}
195
+ INFO: 2024-07-12 13:44:15,858: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645, 198, 271]
196
+ INFO: 2024-07-12 13:44:15,858: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
197
+ INFO: 2024-07-12 13:44:17,890: llmtf.base.darumeru/RCB: Loading Dataset: 2.03s
198
+ INFO: 2024-07-12 13:44:26,535: llmtf.base.darumeru/RCB: Processing Dataset: 8.64s
199
+ INFO: 2024-07-12 13:44:26,550: llmtf.base.darumeru/RCB: Results for darumeru/RCB:
200
+ INFO: 2024-07-12 13:44:26,556: llmtf.base.darumeru/RCB: {'acc': 0.5181818181818182, 'f1_macro': 0.4329109928238468}
201
+ INFO: 2024-07-12 13:44:26,557: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645, 198, 271]
202
+ INFO: 2024-07-12 13:44:26,557: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
203
+ INFO: 2024-07-12 13:44:29,227: llmtf.base.darumeru/ruOpenBookQA: Loading Dataset: 2.67s
204
+ INFO: 2024-07-12 13:45:43,981: llmtf.base.darumeru/ruOpenBookQA: Processing Dataset: 74.75s
205
+ INFO: 2024-07-12 13:45:43,996: llmtf.base.darumeru/ruOpenBookQA: Results for darumeru/ruOpenBookQA:
206
+ INFO: 2024-07-12 13:45:44,008: llmtf.base.darumeru/ruOpenBookQA: {'acc': 0.8178694158075601, 'f1_macro': 0.8177749093697874}
207
+ INFO: 2024-07-12 13:45:44,015: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645, 198, 271]
208
+ INFO: 2024-07-12 13:45:44,015: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
209
+ INFO: 2024-07-12 13:45:51,418: llmtf.base.darumeru/ruTiE: Loading Dataset: 7.40s
210
+ INFO: 2024-07-12 13:50:12,621: llmtf.base.darumeru/ruTiE: Processing Dataset: 261.20s
211
+ INFO: 2024-07-12 13:50:12,624: llmtf.base.darumeru/ruTiE: Results for darumeru/ruTiE:
212
+ INFO: 2024-07-12 13:50:12,657: llmtf.base.darumeru/ruTiE: {'acc': 0.6348837209302326}
213
+ INFO: 2024-07-12 13:50:12,660: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645, 198, 271]
214
+ INFO: 2024-07-12 13:50:12,661: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
215
+ INFO: 2024-07-12 13:50:15,001: llmtf.base.darumeru/ruWorldTree: Loading Dataset: 2.34s
216
+ INFO: 2024-07-12 13:50:18,607: llmtf.base.darumeru/ruWorldTree: Processing Dataset: 3.60s
217
+ INFO: 2024-07-12 13:50:18,609: llmtf.base.darumeru/ruWorldTree: Results for darumeru/ruWorldTree:
218
+ INFO: 2024-07-12 13:50:18,614: llmtf.base.darumeru/ruWorldTree: {'acc': 0.9047619047619048, 'f1_macro': 0.9043845651108326}
219
+ INFO: 2024-07-12 13:50:18,615: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645, 198, 271]
220
+ INFO: 2024-07-12 13:50:18,615: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
221
+ INFO: 2024-07-12 13:50:20,678: llmtf.base.darumeru/RWSD: Loading Dataset: 2.06s
222
+ INFO: 2024-07-12 13:50:28,821: llmtf.base.darumeru/RWSD: Processing Dataset: 8.14s
223
+ INFO: 2024-07-12 13:50:28,823: llmtf.base.darumeru/RWSD: Results for darumeru/RWSD:
224
+ INFO: 2024-07-12 13:50:28,827: llmtf.base.darumeru/RWSD: {'acc': 0.6029411764705882}
225
+ INFO: 2024-07-12 13:50:28,828: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645, 198, 271]
226
+ INFO: 2024-07-12 13:50:28,828: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
227
+ INFO: 2024-07-12 13:50:31,849: llmtf.base.darumeru/USE: Loading Dataset: 3.02s
228
+ INFO: 2024-07-12 13:51:35,478: llmtf.base.darumeru/cp_sent_en: Processing Dataset: 536.50s
229
+ INFO: 2024-07-12 13:51:35,480: llmtf.base.darumeru/cp_sent_en: Results for darumeru/cp_sent_en:
230
+ INFO: 2024-07-12 13:51:35,484: llmtf.base.darumeru/cp_sent_en: {'symbol_per_token': 4.31579408268585, 'len': 0.8895181009295375, 'lcs': 0.8825764242763656}
231
+ INFO: 2024-07-12 13:51:35,486: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645, 198, 271]
232
+ INFO: 2024-07-12 13:51:35,486: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
233
+ INFO: 2024-07-12 13:51:37,358: llmtf.base.darumeru/cp_para_ru: Loading Dataset: 1.87s
234
+ INFO: 2024-07-12 13:55:22,257: llmtf.base.darumeru/USE: Processing Dataset: 290.41s
235
+ INFO: 2024-07-12 13:55:22,261: llmtf.base.darumeru/USE: Results for darumeru/USE:
236
+ INFO: 2024-07-12 13:55:22,266: llmtf.base.darumeru/USE: {'grade_norm': 0.153921568627451}
237
+ INFO: 2024-07-12 13:55:22,269: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645]
238
+ INFO: 2024-07-12 13:55:22,269: llmtf.base.hfmodel: Updated generation_config.stop_strings: []
239
+ INFO: 2024-07-12 13:55:26,569: llmtf.base.russiannlp/rucola_custom: Loading Dataset: 4.30s
240
+ INFO: 2024-07-12 13:56:58,317: llmtf.base.russiannlp/rucola_custom: Processing Dataset: 91.75s
241
+ INFO: 2024-07-12 13:56:58,322: llmtf.base.russiannlp/rucola_custom: Results for russiannlp/rucola_custom:
242
+ INFO: 2024-07-12 13:56:58,334: llmtf.base.russiannlp/rucola_custom: {'acc': 0.7072120559741658, 'mcc': 0.23883340451741628}
243
+ INFO: 2024-07-12 13:56:58,338: llmtf.base.evaluator: Ended eval
244
+ INFO: 2024-07-12 13:56:58,365: llmtf.base.evaluator:
245
+ mean darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
246
+ 0.628 0.182 0.790 0.476 0.603 0.154 0.890 0.993 0.576 0.818 0.635 0.905 0.702 0.596 0.473
247
+ INFO: 2024-07-12 13:58:57,546: llmtf.base.daru/treewayextractive: Processing Dataset: 1575.85s
248
+ INFO: 2024-07-12 13:58:57,550: llmtf.base.daru/treewayextractive: Results for daru/treewayextractive:
249
+ INFO: 2024-07-12 13:58:57,798: llmtf.base.daru/treewayextractive: {'r-prec': 0.3917012265512266}
250
+ INFO: 2024-07-12 13:58:58,347: llmtf.base.evaluator: Ended eval
251
+ INFO: 2024-07-12 13:58:58,375: llmtf.base.evaluator:
252
+ mean daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
253
+ 0.612 0.392 0.182 0.790 0.476 0.603 0.154 0.890 0.993 0.576 0.818 0.635 0.905 0.702 0.596 0.473
254
+ INFO: 2024-07-12 14:04:59,106: llmtf.base.darumeru/cp_para_ru: Processing Dataset: 801.75s
255
+ INFO: 2024-07-12 14:04:59,122: llmtf.base.darumeru/cp_para_ru: Results for darumeru/cp_para_ru:
256
+ INFO: 2024-07-12 14:04:59,126: llmtf.base.darumeru/cp_para_ru: {'symbol_per_token': 2.654953066881009, 'len': 0.9876165527469303, 'lcs': 0.8268038045788856}
257
+ INFO: 2024-07-12 14:04:59,127: llmtf.base.hfmodel: Updated generation_config.eos_token_id: [151645, 198, 271]
258
+ INFO: 2024-07-12 14:04:59,127: llmtf.base.hfmodel: Updated generation_config.stop_strings: ['\n', '\n\n']
259
+ INFO: 2024-07-12 14:05:01,461: llmtf.base.darumeru/cp_para_en: Loading Dataset: 2.33s
260
+ INFO: 2024-07-12 14:14:59,808: llmtf.base.darumeru/cp_para_en: Processing Dataset: 598.35s
261
+ INFO: 2024-07-12 14:14:59,811: llmtf.base.darumeru/cp_para_en: Results for darumeru/cp_para_en:
262
+ INFO: 2024-07-12 14:14:59,832: llmtf.base.darumeru/cp_para_en: {'symbol_per_token': 4.353034074577473, 'len': 0.9802575114789726, 'lcs': 0.9058441645642358}
263
+ INFO: 2024-07-12 14:14:59,833: llmtf.base.evaluator: Ended eval
264
+ INFO: 2024-07-12 14:14:59,844: llmtf.base.evaluator:
265
+ mean daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
266
+ 0.642 0.392 0.182 0.790 0.476 0.603 0.154 0.906 0.827 0.890 0.993 0.576 0.818 0.635 0.905 0.702 0.596 0.473
267
+ INFO: 2024-07-12 14:37:23,690: llmtf.base.daru/treewayabstractive: Processing Dataset: 3892.16s
268
+ INFO: 2024-07-12 14:37:23,708: llmtf.base.daru/treewayabstractive: Results for daru/treewayabstractive:
269
+ INFO: 2024-07-12 14:37:23,718: llmtf.base.daru/treewayabstractive: {'rouge1': 0.2813082731366044, 'rouge2': 0.10129515826302848}
270
+ INFO: 2024-07-12 14:37:23,720: llmtf.base.evaluator: Ended eval
271
+ INFO: 2024-07-12 14:37:23,733: llmtf.base.evaluator:
272
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
273
+ 0.617 0.191 0.392 0.182 0.790 0.476 0.603 0.154 0.906 0.827 0.890 0.993 0.576 0.818 0.635 0.905 0.702 0.596 0.473
llmtf_eval_k0/evaluation_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ mean daru/treewayabstractive daru/treewayextractive darumeru/MultiQ darumeru/PARus darumeru/RCB darumeru/RWSD darumeru/USE darumeru/cp_para_en darumeru/cp_para_ru darumeru/cp_sent_en darumeru/cp_sent_ru darumeru/ruMMLU darumeru/ruOpenBookQA darumeru/ruTiE darumeru/ruWorldTree nlpcoreteam/enMMLU nlpcoreteam/ruMMLU russiannlp/rucola_custom
2
+ 0.617 0.191 0.392 0.182 0.790 0.476 0.603 0.154 0.906 0.827 0.890 0.993 0.576 0.818 0.635 0.905 0.702 0.596 0.473
llmtf_eval_k0/nlpcoreteam_enMMLU.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71000a11ac35e74385e8cbeb590e2ffe89f4803f551c6067edb7fceb2acaa051
3
+ size 37162610
llmtf_eval_k0/nlpcoreteam_enMMLU_params.jsonl ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "custom_generation_config": null,
3
+ "model_params": {
4
+ "model_name_or_path": "Qwen/Qwen2-7B-Instruct",
5
+ "generation_config": {
6
+ "do_sample": true,
7
+ "eos_token_id": [
8
+ 151645
9
+ ],
10
+ "max_length": 32768,
11
+ "max_new_tokens": 64,
12
+ "pad_token_id": 151643,
13
+ "stop_strings": [],
14
+ "temperature": 0.1,
15
+ "top_k": 40,
16
+ "top_p": 0.9,
17
+ "transformers_version": "4.38.2",
18
+ "trust_remote_code": [
19
+ false
20
+ ]
21
+ },
22
+ "conversation_template": {
23
+ "system_prompt": "",
24
+ "system_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
25
+ "user_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
26
+ "bot_message_template": "<|im_start|>{role}\n{content}<|im_end|>\n",
27
+ "bot_message_template_incomplete": "<|im_start|>{role}\n{content}",
28
+ "user_role": "user",
29
+ "bot_role": "assistant",
30
+ "system_role": "system",
31
+ "global_prefix": "",
32
+ "suffix": "<|im_start|>assistant\n",
33
+ "add_special_tokens": false,
34
+ "eos_token": "<|im_end|>"
35
+ },
36
+ "load_in_8bit": false,
37
+ "torch_dtype": "auto",
38
+ "use_flash_attention_2": true,
39
+ "device_map": "cuda:0",
40
+ "use_fast_tokenizer": true,
41
+ "leading_space": false,
42
+ "space_token": null,
43
+ "trust_remote_code": [
44
+ false
45
+ ],
46
+ "max_model_len": 32768
47
+ },
48
+ "task_params": {
49
+ "max_len": 4000,
50
+ "few_shot_count": 0,
51
+ "batch_size": 1,
52
+ "max_sample_per_dataset": 10000000000000,
53
+ "method": "calculate_tokens_proba"
54
+ }
55
+ }