Update leaderboard_data.jsonl
Browse files- leaderboard_data.jsonl +27 -43
leaderboard_data.jsonl
CHANGED
@@ -1,43 +1,27 @@
|
|
1 |
-
{"Model": "
|
2 |
-
{"Model":"
|
3 |
-
{"Model":"
|
4 |
-
{"Model":"
|
5 |
-
{"Model":"
|
6 |
-
{"Model":"
|
7 |
-
{"Model":"
|
8 |
-
{"Model":"
|
9 |
-
{"Model":"
|
10 |
-
{"Model":"
|
11 |
-
{"Model":"
|
12 |
-
{"Model":"google/gemma-2-2b-it", "Precision": "bfloat16", "#Params (B)": 2.61, "Part Multiple Choice": 31.12, "ARC Easy": 71.26, "ARC Challenge": 57.72, "MMLU Pro": 16.23, "AUT Multiple Choice Persian": 49.
|
13 |
-
{"Model":"
|
14 |
-
{"Model":"google/gemma-2-
|
15 |
-
{"Model":"
|
16 |
-
{"Model":"MaralGPT/Maral-7B-alpha-1", "Precision": "bfloat16", "#Params (B)": 7.24, "Part Multiple Choice": 26.67, "ARC Easy": 44.54, "ARC Challenge": 30.87, "MMLU Pro": 15.99, "AUT Multiple Choice Persian": 36.09, "Hub License": "mit", "Model sha": "
|
17 |
-
{"Model":"
|
18 |
-
{"Model":"
|
19 |
-
{"Model":"
|
20 |
-
{"Model":"
|
21 |
-
{"Model":"
|
22 |
-
{"Model":"
|
23 |
-
{"Model":"
|
24 |
-
{"Model":"
|
25 |
-
{"Model":"
|
26 |
-
{"Model":"meta-llama/Llama-3.
|
27 |
-
{"Model":"
|
28 |
-
{"Model":"meta-llama/Llama-3.1-70B-Instruct", "Precision": "bfloat16", "#Params (B)": 70.7, "Part Multiple Choice": 51.53, "ARC Easy": 94.54, "ARC Challenge": 87.24, "MMLU Pro": 43.19, "AUT Multiple Choice Persian": 70.41, "Hub License": "llama3.1", "Model sha": "main", "model_name_for_query": "meta-llama/Llama-3.1-70B-Instruct"}
|
29 |
-
{"Model":"Qwen/Qwen2-72B-Instruct", "Precision": "bfloat16", "#Params (B)": 72.7, "Part Multiple Choice": 47.10, "ARC Easy": 94.25, "ARC Challenge": 87.24, "MMLU Pro": 40.33, "AUT Multiple Choice Persian": 62.91, "Hub License": "tongyi-qianwen", "Model sha": "main", "model_name_for_query": "Qwen/Qwen2-72B-Instruct"}
|
30 |
-
{"Model":"mistralai/Mistral-Large-Instruct-2407", "Precision": "bfloat16", "#Params (B)": 123, "Part Multiple Choice": 47.47, "ARC Easy": 95.68, "ARC Challenge": 87.91, "MMLU Pro": 44.63, "AUT Multiple Choice Persian": 68.04, "Hub License": "mrl", "Model sha": "main", "model_name_for_query": "mistralai/Mistral-Large-Instruct-2407"}
|
31 |
-
{"Model":"PartAI/Llama-3.2-1B-checkpoint5001", "Precision": "bfloat16", "#Params (B)": 1.24, "Part Multiple Choice": 25.43, "ARC Easy": 21.55, "ARC Challenge": 22.81, "MMLU Pro": 13.84, "AUT Multiple Choice Persian": 24.65, "Hub License": "llama3.2", "Model sha": "main", "model_name_for_query": "PartAI/Llama-3.2-1B-checkpoint5001"}
|
32 |
-
{"Model":"PartAI/Llama-3.2-1B-checkpoint-820", "Precision": "bfloat16", "#Params (B)": 1.24, "Part Multiple Choice": 25.29, "ARC Easy": 23.85, "ARC Challenge" : 28.85, "MMLU Pro": 14.08, "AUT Multiple Choice Persian": 23.47, "Hub License": "llama3.2", "Model sha": "main", "model_name_for_query": "PartAI/Llama-3.2-1B-checkpoint-820"}
|
33 |
-
{"Model":"microsoft/Phi-3-mini-4k-instruct", "Precision": "bfloat16", "#Params (B)": 3.82, "Part Multiple Choice": 27.37, "ARC Easy": 36.78, "ARC Challenge": 32.88, "MMLU Pro": 17.89, "AUT Multiple Choice Persian": 35.10, "Hub License": "mit", "Model sha": "main", "model_name_for_query": "microsoft/Phi-3-mini-4k-instruct"}
|
34 |
-
{"Model":"microsoft/Phi-3-mini-4k-instruct-akhlaghi", "Precision": "bfloat16", "#Params (B)": 3.85, "Part Multiple Choice": 30.56, "ARC Easy": 64.65, "ARC Challenge": 51.00, "MMLU Pro": 17.18, "AUT Multiple Choice Persian": 43.98, "Hub License": "mit", "Model sha": "main", "model_name_for_query": "microsoft/Phi-3-mini-4k-instruct-akhlaghi"}
|
35 |
-
{"Model":"CohereForAI/c4ai-command-r7b-12-2024", "Precision": "float16", "#Params (B)": 8.03, "Part Multiple Choice": 34.03, "ARC Easy": 77.01, "ARC Challenge": 66.44, "MMLU Pro": 23.62, "AUT Multiple Choice Persian": 50.49, "Hub License": "cc-by-nc-4.0", "Model sha": "main", "model_name_for_query": "CohereForAI/c4ai-command-r7b-12-2024"}
|
36 |
-
{"Model":"PartAI/Llama3.2-1B-Part-v1", "Precision": "bfloat16", "#Params (B)": 1.24, "Part Multiple Choice": 0, "ARC Easy": 22.70, "ARC Challenge" : 25.50, "MMLU Pro": 7.15, "AUT Multiple Choice Persian": 25.24, "Hub License": "llama3.2", "Model sha": "main", "model_name_for_query": "PartAI/Llama3.2-1B-Part-v1"}
|
37 |
-
{"Model":"CohereForAI/c4ai-command-a-03-2025", "Precision": "bfloat16", "#Params (B)": 111, "Part Multiple Choice": 49.49, "ARC Easy": 96.55, "ARC Challenge": 86.57, "MMLU Pro": 43.43, "AUT Multiple Choice Persian": 70.21, "Hub License": "cc-by-nc-4.0", "Model sha": "main", "model_name_for_query": "CohereForAI/c4ai-command-a-03-2025"}
|
38 |
-
{"Model":"meta-llama/Llama-4-Scout-17B-16E-Instruct", "Precision": "bfloat16", "#Params (B)": 109, "Part Multiple Choice": 53.75, "ARC Easy": 94.83, "ARC Challenge": 88.59, "MMLU Pro": 39.14, "AUT Multiple Choice Persian": 66.86, "Hub License": "llama4", "Model sha": "main", "model_name_for_query": "meta-llama/Llama-4-Scout-17B-16E-Instruct"}
|
39 |
-
{"Model":"google/gemma-3-1b-it", "Precision": "bfloat16", "#Params (B)": 1, "Part Multiple Choice": 27.15, "ARC Easy": 41.67, "ARC Challenge": 31.54, "MMLU Pro": 16.23, "AUT Multiple Choice Persian": 36.88, "Hub License": "gemma", "Model sha": "main", "model_name_for_query": "google/gemma-3-1b-it"}
|
40 |
-
{"Model":"google/gemma-3-4b-it", "Precision": "bfloat16", "#Params (B)": 4.3, "Part Multiple Choice": 34.40, "ARC Easy": 77.01, "ARC Challenge": 63.76, "MMLU Pro": 19.81, "AUT Multiple Choice Persian": 50.30, "Hub License": "gemma", "Model sha": "main", "model_name_for_query": "google/gemma-3-4b-it"}
|
41 |
-
{"Model":"google/gemma-3-12b-it", "Precision": "bfloat16", "#Params (B)": 12.2, "Part Multiple Choice": 43.52, "ARC Easy": 93.39, "ARC Challenge": 81.21, "MMLU Pro": 29.36, "AUT Multiple Choice Persian": 57.00, "Hub License": "gemma", "Model sha": "main", "model_name_for_query": "google/gemma-3-12b-it"}
|
42 |
-
{"Model":"google/gemma-3-27b-it", "Precision": "bfloat16", "#Params (B)": 27.4, "Part Multiple Choice": 48.56, "ARC Easy": 95.69, "ARC Challenge": 90.60, "MMLU Pro": 40.10, "AUT Multiple Choice Persian": 64.30, "Hub License": "gemma", "Model sha": "main", "model_name_for_query": "google/gemma-3-27b-it"}
|
43 |
-
{"Model":"Qwen/QwQ-32B", "Precision": "bfloat16", "#Params (B)": 32.8, "Part Multiple Choice": 46.48, "ARC Easy": 89.37, "ARC Challenge": 82.55, "MMLU Pro": 35.32, "AUT Multiple Choice Persian": 57.40, "Hub License": "apache-2.0", "Model sha": "main", "model_name_for_query": "Qwen/QwQ-32B"}
|
|
|
1 |
+
{"Model": "meta-llama/Llama-3.2-1B-Instruct", "Precision": "bfloat16", "#Params (B)": 1.24, "Part Multiple Choice": 28.28, "ARC Easy": 47.1, "ARC Challenge": 39.0, "MMLU Pro": 12.17, "AUT Multiple Choice Persian": 36.88, "Hub License": "llama3.2", "Model sha": "9213176726f574b556790deb65791e0c5aa438b6", "model_name_for_query": "meta-llama/Llama-3.2-1B-Instruct"}
|
2 |
+
{"Model": "PartAI/Dorna2-Llama3.1-8B-Instruct", "Precision": "bfloat16", "#Params (B)": 8.03, "Part Multiple Choice": 34.48, "ARC Easy": 79.59, "ARC Challenge": 64.42, "MMLU Pro": 21.47, "AUT Multiple Choice Persian": 53.64, "Hub License": "llama3.1", "Model sha": "b78e4bd261100c96e511ed5090ca0ce0e1f4b340", "model_name_for_query": "PartAI/Dorna2-Llama3.1-8B-Instruct"}
|
3 |
+
{"Model": "Qwen/Qwen2.5-7B-Instruct", "Precision": "bfloat16", "#Params (B)": 7.62, "Part Multiple Choice": 36.72, "ARC Easy": 79.02, "ARC Challenge": 69.13, "MMLU Pro": 21.96, "AUT Multiple Choice Persian": 52.66, "Hub License": "apache-2.0", "Model sha": "a09a35458c702b33eeacc393d103063234e8bc28", "model_name_for_query": "Qwen/Qwen2.5-7B-Instruct"}
|
4 |
+
{"Model": "CohereForAI/c4ai-command-r7b-12-2024", "Precision": "float16", "#Params (B)": 8.03, "Part Multiple Choice": 34.03, "ARC Easy": 77.01, "ARC Challenge": 66.44, "MMLU Pro": 23.62, "AUT Multiple Choice Persian": 50.49, "Hub License": "cc-by-nc-4.0", "Model sha": "ff3e3c9c990d8d7576a4f8fa839281e11ebabc09", "model_name_for_query": "CohereForAI/c4ai-command-r7b-12-2024"}
|
5 |
+
{"Model": "google/gemma-3-4b-it", "Precision": "bfloat16", "#Params (B)": 4.3, "Part Multiple Choice": 34.4, "ARC Easy": 77.01, "ARC Challenge": 63.76, "MMLU Pro": 19.81, "AUT Multiple Choice Persian": 50.3, "Hub License": "gemma", "Model sha": "093f9f388b31de276ce2de164bdc2081324b9767", "model_name_for_query": "google/gemma-3-4b-it"}
|
6 |
+
{"Model": "Qwen/Qwen2-7B-Instruct", "Precision": "bfloat16", "#Params (B)": 7.62, "Part Multiple Choice": 35.9, "ARC Easy": 77.3, "ARC Challenge": 68.46, "MMLU Pro": 23.87, "AUT Multiple Choice Persian": 51.68, "Hub License": "apache-2.0", "Model sha": "f2826a00ceef68f0f2b946d945ecc0477ce4450c", "model_name_for_query": "Qwen/Qwen2-7B-Instruct"}
|
7 |
+
{"Model": "meta-llama/Meta-Llama-3-8B-Instruct", "Precision": "bfloat16", "#Params (B)": 8.03, "Part Multiple Choice": 34.99, "ARC Easy": 72.9, "ARC Challenge": 57.7, "MMLU Pro": 25.54, "AUT Multiple Choice Persian": 53.85, "Hub License": "llama3", "Model sha": "5f0b02c75b57c5855da9ae460ce51323ea669d8a", "model_name_for_query": "meta-llama/Meta-Llama-3-8B-Instruct"}
|
8 |
+
{"Model": "NousResearch/Hermes-3-Llama-3.1-8B", "Precision": "bfloat16", "#Params (B)": 8.03, "Part Multiple Choice": 35.01, "ARC Easy": 77.01, "ARC Challenge": 58.39, "MMLU Pro": 21.0, "AUT Multiple Choice Persian": 52.46, "Hub License": "llama3", "Model sha": "896ea440e5a9e6070e3d8a2774daf2b481ab425b", "model_name_for_query": "NousResearch/Hermes-3-Llama-3.1-8B"}
|
9 |
+
{"Model": "CohereForAI/aya-expanse-32b", "Precision": "float16", "#Params (B)": 32.3, "Part Multiple Choice": 43.36, "ARC Easy": 93.1, "ARC Challenge": 79.87, "MMLU Pro": 31.03, "AUT Multiple Choice Persian": 62.33, "Hub License": "cc-by-nc-4.0", "Model sha": "94bda1dcb97d260f732d230b832c7c685ae91e23", "model_name_for_query": "CohereForAI/aya-expanse-32b"}
|
10 |
+
{"Model": "CohereForAI/aya-23-8B", "Precision": "float16", "#Params (B)": 8.03, "Part Multiple Choice": 32.82, "ARC Easy": 80.46, "ARC Challenge": 64.43, "MMLU Pro": 18.62, "AUT Multiple Choice Persian": 52.86, "Hub License": "cc-by-nc-4.0", "Model sha": "2a1a63b24af8f591616fdf58936ee576d63ca835", "model_name_for_query": "CohereForAI/aya-23-8B"}
|
11 |
+
{"Model": "meta-llama/Llama-3.1-8B-Instruct", "Precision": "bfloat16", "#Params (B)": 8.03, "Part Multiple Choice": 36.68, "ARC Easy": 78.4, "ARC Challenge": 60.4, "MMLU Pro": 21.0, "AUT Multiple Choice Persian": 54.24, "Hub License": "llama3.1", "Model sha": "0e9e39f249a16976918f6564b8830bc894c89659", "model_name_for_query": "meta-llama/Llama-3.1-8B-Instruct"}
|
12 |
+
{"Model": "google/gemma-2-2b-it", "Precision": "bfloat16", "#Params (B)": 2.61, "Part Multiple Choice": 31.12, "ARC Easy": 71.26, "ARC Challenge": 57.72, "MMLU Pro": 16.23, "AUT Multiple Choice Persian": 49.9, "Hub License": "gemma", "Model sha": "299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8", "model_name_for_query": "google/gemma-2-2b-it"}
|
13 |
+
{"Model": "meta-llama/Meta-Llama-3-8B", "Precision": "bfloat16", "#Params (B)": 8.03, "Part Multiple Choice": 34.32, "ARC Easy": 72.12, "ARC Challenge": 56.37, "MMLU Pro": 19.33, "AUT Multiple Choice Persian": 48.32, "Hub License": "llama3", "Model sha": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", "model_name_for_query": "meta-llama/Meta-Llama-3-8B"}
|
14 |
+
{"Model": "google/gemma-2-9b-it", "Precision": "bfloat16", "#Params (B)": 9.24, "Part Multiple Choice": 42.7, "ARC Easy": 93.1, "ARC Challenge": 84.56, "MMLU Pro": 31.74, "AUT Multiple Choice Persian": 62.33, "Hub License": "gemma", "Model sha": "11c9b309abf73637e4b6f9a3fa1e92e615547819", "model_name_for_query": "google/gemma-2-9b-it"}
|
15 |
+
{"Model": "PartAI/Dorna-Llama3-8B-Instruct", "Precision": "bfloat16", "#Params (B)": 8.03, "Part Multiple Choice": 33.88, "ARC Easy": 70.4, "ARC Challenge": 61.07, "MMLU Pro": 23.39, "AUT Multiple Choice Persian": 52.86, "Hub License": "llama3", "Model sha": "fb268bb51b950b4db5b7c82c1b73d9e803020eed", "model_name_for_query": "PartAI/Dorna-Llama3-8B-Instruct"}
|
16 |
+
{"Model": "MaralGPT/Maral-7B-alpha-1", "Precision": "bfloat16", "#Params (B)": 7.24, "Part Multiple Choice": 26.67, "ARC Easy": 44.54, "ARC Challenge": 30.87, "MMLU Pro": 15.99, "AUT Multiple Choice Persian": 36.09, "Hub License": "mit", "Model sha": "2ab5ca2a0d1a4454a78b4ca911e595bb9da2fe2f", "model_name_for_query": "MaralGPT/Maral-7B-alpha-1"}
|
17 |
+
{"Model": "Qwen/QwQ-32B-Preview", "Precision": "bfloat16", "#Params (B)": 32.8, "Part Multiple Choice": 46.64, "ARC Easy": 91.95, "ARC Challenge": 87.24, "MMLU Pro": 37.94, "AUT Multiple Choice Persian": 60.15, "Hub License": "apache-2.0", "Model sha": "91906fe41a48b6a89ce2970abfd1269eefee170e", "model_name_for_query": "Qwen/QwQ-32B-Preview"}
|
18 |
+
{"Model": "Qwen/QwQ-32B", "Precision": "bfloat16", "#Params (B)": 32.8, "Part Multiple Choice": 46.48, "ARC Easy": 89.37, "ARC Challenge": 82.55, "MMLU Pro": 35.32, "AUT Multiple Choice Persian": 57.4, "Hub License": "apache-2.0", "Model sha": "976055f8c83f394f35dbd3ab09a285a984907bd0", "model_name_for_query": "Qwen/QwQ-32B"}
|
19 |
+
{"Model": "CohereForAI/aya-23-35B", "Precision": "float16", "#Params (B)": 35, "Part Multiple Choice": 36.79, "ARC Easy": 87.93, "ARC Challenge": 72.48, "MMLU Pro": 25.54, "AUT Multiple Choice Persian": 61.14, "Hub License": "cc-by-nc-4.0", "Model sha": "5e72bd5ad83e5e1612ee7f56a0c1a439a7cfb887", "model_name_for_query": "CohereForAI/aya-23-35B"}
|
20 |
+
{"Model": "google/gemma-3-1b-it", "Precision": "bfloat16", "#Params (B)": 1, "Part Multiple Choice": 27.15, "ARC Easy": 41.67, "ARC Challenge": 31.54, "MMLU Pro": 16.23, "AUT Multiple Choice Persian": 36.88, "Hub License": "gemma", "Model sha": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", "model_name_for_query": "google/gemma-3-1b-it"}
|
21 |
+
{"Model": "CohereForAI/aya-expanse-8b", "Precision": "float16", "#Params (B)": 8.03, "Part Multiple Choice": 34.91, "ARC Easy": 79.6, "ARC Challenge": 70.47, "MMLU Pro": 25.06, "AUT Multiple Choice Persian": 58.38, "Hub License": "cc-by-nc-4.0", "Model sha": "0ad43ec1e309e1351faa4b1d22713c065e37359a", "model_name_for_query": "CohereForAI/aya-expanse-8b"}
|
22 |
+
{"Model": "google/gemma-2-27b-it", "Precision": "bfloat16", "#Params (B)": 27.2, "Part Multiple Choice": 46.03, "ARC Easy": 95.98, "ARC Challenge": 85.91, "MMLU Pro": 36.28, "AUT Multiple Choice Persian": 63.12, "Hub License": "gemma", "Model sha": "aaf20e6b9f4c0fcf043f6fb2a2068419086d77b0", "model_name_for_query": "google/gemma-2-27b-it"}
|
23 |
+
{"Model": "Qwen/Qwen2.5-32B-Instruct", "Precision": "bfloat16", "#Params (B)": 32.8, "Part Multiple Choice": 46.06, "ARC Easy": 90.8, "ARC Challenge": 85.91, "MMLU Pro": 38.19, "AUT Multiple Choice Persian": 61.34, "Hub License": "apache-2.0", "Model sha": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", "model_name_for_query": "Qwen/Qwen2.5-32B-Instruct"}
|
24 |
+
{"Model": "universitytehran/PersianMind-v1.0", "Precision": "bfloat16", "#Params (B)": 6.82, "Part Multiple Choice": 29.27, "ARC Easy": 58.91, "ARC Challenge": 48.32, "MMLU Pro": 15.51, "AUT Multiple Choice Persian": 45.36, "Hub License": "cc-by-nc-sa-4.0", "Model sha": "af603eeb074138e2a613fbc95d89f018afbd3041", "model_name_for_query": "universitytehran/PersianMind-v1.0"}
|
25 |
+
{"Model": "google/gemma-3-27b-it", "Precision": "bfloat16", "#Params (B)": 27.4, "Part Multiple Choice": 48.56, "ARC Easy": 95.69, "ARC Challenge": 90.6, "MMLU Pro": 40.1, "AUT Multiple Choice Persian": 64.3, "Hub License": "gemma", "Model sha": "005ad3404e59d6023443cb575daa05336842228a", "model_name_for_query": "google/gemma-3-27b-it"}
|
26 |
+
{"Model": "meta-llama/Llama-3.1-8B", "Precision": "bfloat16", "#Params (B)": 8.03, "Part Multiple Choice": 35.52, "ARC Easy": 75.28, "ARC Challenge": 53.02, "MMLU Pro": 24.1, "AUT Multiple Choice Persian": 53.45, "Hub License": "llama3.1", "Model sha": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", "model_name_for_query": "meta-llama/Llama-3.1-8B"}
|
27 |
+
{"Model": "google/gemma-3-12b-it", "Precision": "bfloat16", "#Params (B)": 12.2, "Part Multiple Choice": 43.52, "ARC Easy": 93.39, "ARC Challenge": 81.21, "MMLU Pro": 29.36, "AUT Multiple Choice Persian": 57.0, "Hub License": "gemma", "Model sha": "96b6f1eccf38110c56df3a15bffe176da04bfd80", "model_name_for_query": "google/gemma-3-12b-it"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|