Update leaderboard_data.jsonl
Browse files- leaderboard_data.jsonl +1 -1
leaderboard_data.jsonl
CHANGED
@@ -34,7 +34,7 @@
|
|
34 |
{"Model": "Meta-Llama-3-8B", "model_name_for_query": "meta-llama/Meta-Llama-3-8B", "GeneralKnowledge": 47.7, "GSM8K": 10.3, "DC-Homograph": 41.67, "MC-Homograph": 74.42, "PiQA": 64.16, "Proverb-Quiz": 37.3, "VerbEval": 39.46, "Winogrande": 55.36, "Arc-Challenge": 62.07, "Arc-Easy": 75.83, "Feqh": 27.43, "Hallucination (Truthfulness)": 37.0, "P-Hellaswag": 76.49, "Law": 35.67, "AUT Multiple Choice": 42.5, "Parsi Literature": 28.19, "BoolQA": 75.2, "Reading Comprehension": 19.5, "PartExpert": 35.1, "MMLU Pro": 22.8, "Iranian Social Norms": 54.02, "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", "Hub License": "llama3"}
|
35 |
{"Model": "Dorna-Llama3-8B-Instruct", "model_name_for_query": "PartAI/Dorna-Llama3-8B-Instruct", "GeneralKnowledge": 41.33, "GSM8K": 10.3, "DC-Homograph": 40.74, "MC-Homograph": 74.65, "PiQA": 66.17, "Proverb-Quiz": 35.41, "VerbEval": 34.74, "Winogrande": 56.16, "Arc-Challenge": 59.94, "Arc-Easy": 70.7, "Feqh": 29.14, "Hallucination (Truthfulness)": 31.49, "P-Hellaswag": 75.68, "Law": 25.33, "AUT Multiple Choice": 36.9, "Parsi Literature": 27.54, "BoolQA": 80.1, "Reading Comprehension": 21.8, "PartExpert": 34.49, "MMLU Pro": 22.0, "Iranian Social Norms": 69.39, "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "fb268bb51b950b4db5b7c82c1b73d9e803020eed", "Hub License": "llama3"}
|
36 |
{"Model": "gemma-2-2b-it", "model_name_for_query": "google/gemma-2-2b-it", "GeneralKnowledge": 32.91, "GSM8K": 6.4, "DC-Homograph": 47.22, "MC-Homograph": 74.65, "PiQA": 66.87, "Proverb-Quiz": 45.68, "VerbEval": 36.18, "Winogrande": 54.74, "Arc-Challenge": 57.91, "Arc-Easy": 70.48, "Feqh": 25.71, "Hallucination (Truthfulness)": 39.02, "P-Hellaswag": 69.88, "Law": 32.67, "AUT Multiple Choice": 36.9, "Parsi Literature": 30.76, "BoolQA": 72.4, "Reading Comprehension": 0.3, "PartExpert": 31.31, "MMLU Pro": 18.2, "Iranian Social Norms": 40.18, "#Params (B)": 2.61, "Precision": "BF16", "Model sha": "299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8", "Hub License": "gemma"}
|
37 |
-
{"Model": "PersianMind-v1.0", "model_name_for_query": "universitytehran/PersianMind-v1.0", "GeneralKnowledge": 30.61, "GSM8K": 2.3, "DC-Homograph": 41.67, "MC-Homograph": 65.9, "PiQA": 59.76, "Proverb-Quiz": 34.32, "VerbEval": 26.26, "Winogrande": 52.17, "Arc-Challenge": 54.59, "Arc-Easy": 69.73, "Feqh": 26.29, "Hallucination (Truthfulness)": 2.37, "P-Hellaswag": 63.78, "Law": 27.33, "AUT Multiple Choice": 36.1, "Parsi Literature": 27.8, "BoolQA": 66.3, "Reading Comprehension":
|
38 |
{"Model": "gemma-3-1b-it", "model_name_for_query": "google/gemma-3-1b-it", "GeneralKnowledge": 26.02, "GSM8K": 4.3, "DC-Homograph": 49.07, "MC-Homograph": 51.15, "PiQA": 57.66, "Proverb-Quiz": 28.92, "VerbEval": 27.67, "Winogrande": 50.58, "Arc-Challenge": 36.43, "Arc-Easy": 46.1, "Feqh": 28.0, "Hallucination (Truthfulness)": 54.94, "P-Hellaswag": 63.92, "Law": 20.33, "AUT Multiple Choice": 29.1, "Parsi Literature": 24.97, "BoolQA": 63.9, "Reading Comprehension": 2.1, "PartExpert": 27.22, "MMLU Pro": 13.7, "Iranian Social Norms": 51.22, "#Params (B)": 0.99, "Precision": "BF16", "Model sha": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", "Hub License": "gemma"}
|
39 |
{"Model": "Llama-3.2-1B-Instruct", "model_name_for_query": "meta-llama/Llama-3.2-1B-Instruct", "GeneralKnowledge": 29.59, "GSM8K": 4.1, "DC-Homograph": 50.93, "MC-Homograph": 52.53, "PiQA": 54.05, "Proverb-Quiz": 28.65, "VerbEval": 26.11, "Winogrande": 49.07, "Arc-Challenge": 37.5, "Arc-Easy": 47.38, "Feqh": 31.43, "Hallucination (Truthfulness)": 3.34, "P-Hellaswag": 55.4, "Law": 24.0, "AUT Multiple Choice": 29.9, "Parsi Literature": 27.03, "BoolQA": 64.1, "Reading Comprehension": 7.2, "PartExpert": 28.59, "MMLU Pro": 15.7, "Iranian Social Norms": 37.44, "#Params (B)": 1.23, "Precision": "BF16", "Model sha": "9213176726f574b556790deb65791e0c5aa438b6", "Hub License": "llama3.2"}
|
40 |
{"Model": "Maral-7B-alpha-1", "model_name_for_query": "MaralGPT/Maral-7B-alpha-1", "GeneralKnowledge": 31.63, "GSM8K": 6.1, "DC-Homograph": 43.52, "MC-Homograph": 47.47, "PiQA": 51.95, "Proverb-Quiz": 22.16, "VerbEval": 28.96, "Winogrande": 49.42, "Arc-Challenge": 37.29, "Arc-Easy": 43.1, "Feqh": 26.29, "Hallucination (Truthfulness)": 0.0, "P-Hellaswag": 60.18, "Law": 26.33, "AUT Multiple Choice": 28.4, "Parsi Literature": 26.77, "BoolQA": 62.7, "Reading Comprehension": 10.8, "PartExpert": 27.1, "MMLU Pro": 14.8, "Iranian Social Norms": 24.63, "#Params (B)": 7.24, "Precision": "BF16", "Model sha": "2ab5ca2a0d1a4454a78b4ca911e595bb9da2fe2f", "Hub License": "mit"}
|
|
|
34 |
{"Model": "Meta-Llama-3-8B", "model_name_for_query": "meta-llama/Meta-Llama-3-8B", "GeneralKnowledge": 47.7, "GSM8K": 10.3, "DC-Homograph": 41.67, "MC-Homograph": 74.42, "PiQA": 64.16, "Proverb-Quiz": 37.3, "VerbEval": 39.46, "Winogrande": 55.36, "Arc-Challenge": 62.07, "Arc-Easy": 75.83, "Feqh": 27.43, "Hallucination (Truthfulness)": 37.0, "P-Hellaswag": 76.49, "Law": 35.67, "AUT Multiple Choice": 42.5, "Parsi Literature": 28.19, "BoolQA": 75.2, "Reading Comprehension": 19.5, "PartExpert": 35.1, "MMLU Pro": 22.8, "Iranian Social Norms": 54.02, "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", "Hub License": "llama3"}
|
35 |
{"Model": "Dorna-Llama3-8B-Instruct", "model_name_for_query": "PartAI/Dorna-Llama3-8B-Instruct", "GeneralKnowledge": 41.33, "GSM8K": 10.3, "DC-Homograph": 40.74, "MC-Homograph": 74.65, "PiQA": 66.17, "Proverb-Quiz": 35.41, "VerbEval": 34.74, "Winogrande": 56.16, "Arc-Challenge": 59.94, "Arc-Easy": 70.7, "Feqh": 29.14, "Hallucination (Truthfulness)": 31.49, "P-Hellaswag": 75.68, "Law": 25.33, "AUT Multiple Choice": 36.9, "Parsi Literature": 27.54, "BoolQA": 80.1, "Reading Comprehension": 21.8, "PartExpert": 34.49, "MMLU Pro": 22.0, "Iranian Social Norms": 69.39, "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "fb268bb51b950b4db5b7c82c1b73d9e803020eed", "Hub License": "llama3"}
|
36 |
{"Model": "gemma-2-2b-it", "model_name_for_query": "google/gemma-2-2b-it", "GeneralKnowledge": 32.91, "GSM8K": 6.4, "DC-Homograph": 47.22, "MC-Homograph": 74.65, "PiQA": 66.87, "Proverb-Quiz": 45.68, "VerbEval": 36.18, "Winogrande": 54.74, "Arc-Challenge": 57.91, "Arc-Easy": 70.48, "Feqh": 25.71, "Hallucination (Truthfulness)": 39.02, "P-Hellaswag": 69.88, "Law": 32.67, "AUT Multiple Choice": 36.9, "Parsi Literature": 30.76, "BoolQA": 72.4, "Reading Comprehension": 0.3, "PartExpert": 31.31, "MMLU Pro": 18.2, "Iranian Social Norms": 40.18, "#Params (B)": 2.61, "Precision": "BF16", "Model sha": "299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8", "Hub License": "gemma"}
|
37 |
+
{"Model": "PersianMind-v1.0", "model_name_for_query": "universitytehran/PersianMind-v1.0", "GeneralKnowledge": 30.61, "GSM8K": 2.3, "DC-Homograph": 41.67, "MC-Homograph": 65.9, "PiQA": 59.76, "Proverb-Quiz": 34.32, "VerbEval": 26.26, "Winogrande": 52.17, "Arc-Challenge": 54.59, "Arc-Easy": 69.73, "Feqh": 26.29, "Hallucination (Truthfulness)": 2.37, "P-Hellaswag": 63.78, "Law": 27.33, "AUT Multiple Choice": 36.1, "Parsi Literature": 27.8, "BoolQA": 66.3, "Reading Comprehension": 0.0, "PartExpert": 29.75, "MMLU Pro": 14.5, "Iranian Social Norms": 48.41, "#Params (B)": 0.0, "Precision": "F32", "Model sha": "af603eeb074138e2a613fbc95d89f018afbd3041", "Hub License": "cc-by-nc-sa-4.0"}
|
38 |
{"Model": "gemma-3-1b-it", "model_name_for_query": "google/gemma-3-1b-it", "GeneralKnowledge": 26.02, "GSM8K": 4.3, "DC-Homograph": 49.07, "MC-Homograph": 51.15, "PiQA": 57.66, "Proverb-Quiz": 28.92, "VerbEval": 27.67, "Winogrande": 50.58, "Arc-Challenge": 36.43, "Arc-Easy": 46.1, "Feqh": 28.0, "Hallucination (Truthfulness)": 54.94, "P-Hellaswag": 63.92, "Law": 20.33, "AUT Multiple Choice": 29.1, "Parsi Literature": 24.97, "BoolQA": 63.9, "Reading Comprehension": 2.1, "PartExpert": 27.22, "MMLU Pro": 13.7, "Iranian Social Norms": 51.22, "#Params (B)": 0.99, "Precision": "BF16", "Model sha": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", "Hub License": "gemma"}
|
39 |
{"Model": "Llama-3.2-1B-Instruct", "model_name_for_query": "meta-llama/Llama-3.2-1B-Instruct", "GeneralKnowledge": 29.59, "GSM8K": 4.1, "DC-Homograph": 50.93, "MC-Homograph": 52.53, "PiQA": 54.05, "Proverb-Quiz": 28.65, "VerbEval": 26.11, "Winogrande": 49.07, "Arc-Challenge": 37.5, "Arc-Easy": 47.38, "Feqh": 31.43, "Hallucination (Truthfulness)": 3.34, "P-Hellaswag": 55.4, "Law": 24.0, "AUT Multiple Choice": 29.9, "Parsi Literature": 27.03, "BoolQA": 64.1, "Reading Comprehension": 7.2, "PartExpert": 28.59, "MMLU Pro": 15.7, "Iranian Social Norms": 37.44, "#Params (B)": 1.23, "Precision": "BF16", "Model sha": "9213176726f574b556790deb65791e0c5aa438b6", "Hub License": "llama3.2"}
|
40 |
{"Model": "Maral-7B-alpha-1", "model_name_for_query": "MaralGPT/Maral-7B-alpha-1", "GeneralKnowledge": 31.63, "GSM8K": 6.1, "DC-Homograph": 43.52, "MC-Homograph": 47.47, "PiQA": 51.95, "Proverb-Quiz": 22.16, "VerbEval": 28.96, "Winogrande": 49.42, "Arc-Challenge": 37.29, "Arc-Easy": 43.1, "Feqh": 26.29, "Hallucination (Truthfulness)": 0.0, "P-Hellaswag": 60.18, "Law": 26.33, "AUT Multiple Choice": 28.4, "Parsi Literature": 26.77, "BoolQA": 62.7, "Reading Comprehension": 10.8, "PartExpert": 27.1, "MMLU Pro": 14.8, "Iranian Social Norms": 24.63, "#Params (B)": 7.24, "Precision": "BF16", "Model sha": "2ab5ca2a0d1a4454a78b4ca911e595bb9da2fe2f", "Hub License": "mit"}
|