mohalisad commited on
Commit
22b121f
·
verified ·
1 Parent(s): d6def1f

Update leaderboard_data.jsonl

Browse files
Files changed (1) hide show
  1. leaderboard_data.jsonl +40 -40
leaderboard_data.jsonl CHANGED
@@ -1,40 +1,40 @@
1
- {"Model": "gpt-4o-2024-08-06", "model_name_for_query": null, "GeneralKnowledge": "90.82", "GSM8K": "73.10", "DC-Homograph": "87.04", "MC-Homograph": "95.62", "PiQA": "95.10", "Proverb-Quiz": "96.76", "VerbEval": "85.89", "Winogrande": "86.18", "Arc-Challenge": "95.09", "Arc-Easy": "97.22", "Feqh": "46.86", "Hallucination (Truthfulness)": "74.64", "P-Hellaswag": "85.53", "Law": "47.67", "AUT Multiple Choice": "67.70", "Parsi Literature": "45.95", "BoolQA": "94.10", "Reading Comprehension": "10.00", "PartExpert": "57.36", "MMLU Pro": "47.10", "Iranian Social Norms": "76.89", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
2
- {"Model": "gpt-4.1-2025-04-14", "model_name_for_query": null, "GeneralKnowledge": "90.82", "GSM8K": "25.30", "DC-Homograph": "89.81", "MC-Homograph": "95.39", "PiQA": "95.90", "Proverb-Quiz": "95.14", "VerbEval": "83.04", "Winogrande": "85.92", "Arc-Challenge": "95.30", "Arc-Easy": "96.68", "Feqh": "52.00", "Hallucination (Truthfulness)": "77.43", "P-Hellaswag": "85.67", "Law": "53.67", "AUT Multiple Choice": "66.60", "Parsi Literature": "45.82", "BoolQA": "94.70", "Reading Comprehension": "3.60", "PartExpert": "59.92", "MMLU Pro": "50.50", "Iranian Social Norms": "77.56", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
3
- {"Model": "google__gemini-2.0-flash-001", "model_name_for_query": null, "GeneralKnowledge": "87.76", "GSM8K": "53.70", "DC-Homograph": "79.63", "MC-Homograph": "91.71", "PiQA": "90.59", "Proverb-Quiz": "95.14", "VerbEval": "85.15", "Winogrande": "78.74", "Arc-Challenge": "91.35", "Arc-Easy": "97.22", "Feqh": "53.14", "Hallucination (Truthfulness)": "68.87", "P-Hellaswag": "82.95", "Law": "45.67", "AUT Multiple Choice": "60.90", "Parsi Literature": "44.02", "BoolQA": "91.30", "Reading Comprehension": "23.90", "PartExpert": "59.50", "MMLU Pro": "47.80", "Iranian Social Norms": "77.68", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
4
- {"Model": "deepseek-v3-03-24", "model_name_for_query": null, "GeneralKnowledge": "85.71", "GSM8K": "53.10", "DC-Homograph": "83.33", "MC-Homograph": "94.24", "PiQA": "91.39", "Proverb-Quiz": "84.86", "VerbEval": "81.11", "Winogrande": "76.71", "Arc-Challenge": "92.31", "Arc-Easy": "96.58", "Feqh": "42.29", "Hallucination (Truthfulness)": "55.54", "P-Hellaswag": "85.30", "Law": "46.00", "AUT Multiple Choice": "65.60", "Parsi Literature": "44.66", "BoolQA": "95.30", "Reading Comprehension": "20.70", "PartExpert": "58.46", "MMLU Pro": "53.40", "Iranian Social Norms": "71.71", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
5
- {"Model": "gpt-4.1-mini-2025-04-14", "model_name_for_query": null, "GeneralKnowledge": "79.34", "GSM8K": "60.30", "DC-Homograph": "66.67", "MC-Homograph": "94.24", "PiQA": "92.69", "Proverb-Quiz": "82.97", "VerbEval": "77.99", "Winogrande": "80.07", "Arc-Challenge": "91.88", "Arc-Easy": "96.15", "Feqh": "37.71", "Hallucination (Truthfulness)": "66.55", "P-Hellaswag": "84.57", "Law": "44.33", "AUT Multiple Choice": "53.50", "Parsi Literature": "41.18", "BoolQA": "93.70", "Reading Comprehension": "7.70", "PartExpert": "54.37", "MMLU Pro": "47.80", "Iranian Social Norms": "73.35", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
6
- {"Model": "gpt-4o-mini-2024-07-18", "model_name_for_query": null, "GeneralKnowledge": "79.08", "GSM8K": "60.90", "DC-Homograph": "68.52", "MC-Homograph": "90.09", "PiQA": "90.89", "Proverb-Quiz": "84.05", "VerbEval": "74.23", "Winogrande": "75.73", "Arc-Challenge": "86.43", "Arc-Easy": "94.01", "Feqh": "41.71", "Hallucination (Truthfulness)": "82.04", "P-Hellaswag": "83.84", "Law": "34.00", "AUT Multiple Choice": "54.80", "Parsi Literature": "40.93", "BoolQA": "93.30", "Reading Comprehension": "20.50", "PartExpert": "42.54", "MMLU Pro": "34.80", "Iranian Social Norms": "71.59", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
7
- {"Model": "google__gemini-2.0-flash-lite-001", "model_name_for_query": null, "GeneralKnowledge": "84.18", "GSM8K": "39.70", "DC-Homograph": "60.19", "MC-Homograph": "87.79", "PiQA": "85.29", "Proverb-Quiz": "91.35", "VerbEval": "81.39", "Winogrande": "75.64", "Arc-Challenge": "89.64", "Arc-Easy": "93.48", "Feqh": "41.71", "Hallucination (Truthfulness)": "67.32", "P-Hellaswag": "83.54", "Law": "43.00", "AUT Multiple Choice": "58.50", "Parsi Literature": "43.89", "BoolQA": "92.60", "Reading Comprehension": "26.70", "PartExpert": "54.15", "MMLU Pro": "41.20", "Iranian Social Norms": "70.49", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
8
- {"Model": "Qwen2.5-32B-Instruct", "model_name_for_query": "Qwen/Qwen2.5-32B-Instruct", "GeneralKnowledge": "61.73", "GSM8K": "50.10", "DC-Homograph": "67.59", "MC-Homograph": "91.47", "PiQA": "83.98", "Proverb-Quiz": "63.24", "VerbEval": "54.58", "Winogrande": "80.07", "Arc-Challenge": "85.15", "Arc-Easy": "91.87", "Feqh": "38.86", "Hallucination (Truthfulness)": "59.22", "P-Hellaswag": "82.07", "Law": "42.33", "AUT Multiple Choice": "50.40", "Parsi Literature": "40.41", "BoolQA": "93.40", "Reading Comprehension": "7.40", "PartExpert": "46.78", "MMLU Pro": "37.40", "Iranian Social Norms": "70.00", "#Params (B)": 32.76, "Precision": "BF16", "Model sha": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", "Hub License": "apache-2.0"}
9
- {"Model": "gemma-3-27b-it", "model_name_for_query": "google/gemma-3-27b-it", "GeneralKnowledge": "73.72", "GSM8K": "28.30", "DC-Homograph": "63.89", "MC-Homograph": "92.40", "PiQA": "87.29", "Proverb-Quiz": "78.92", "VerbEval": "66.02", "Winogrande": "78.12", "Arc-Challenge": "88.35", "Arc-Easy": "94.22", "Feqh": "24.57", "Hallucination (Truthfulness)": "60.15", "P-Hellaswag": "83.39", "Law": "36.33", "AUT Multiple Choice": "55.20", "Parsi Literature": "40.93", "BoolQA": "91.40", "Reading Comprehension": "1.20", "PartExpert": "49.32", "MMLU Pro": "36.60", "Iranian Social Norms": "70.49", "#Params (B)": 27.43, "Precision": "BF16", "Model sha": "005ad3404e59d6023443cb575daa05336842228a", "Hub License": "gemma"}
10
- {"Model": "Qwen3-32B", "model_name_for_query": "Qwen/Qwen3-32B", "GeneralKnowledge": "67.60", "GSM8K": "37.90", "DC-Homograph": "51.85", "MC-Homograph": "89.63", "PiQA": "87.69", "Proverb-Quiz": "64.59", "VerbEval": "56.35", "Winogrande": "71.48", "Arc-Challenge": "91.13", "Arc-Easy": "94.22", "Feqh": "29.71", "Hallucination (Truthfulness)": "47.50", "P-Hellaswag": "83.47", "Law": "37.00", "AUT Multiple Choice": "48.30", "Parsi Literature": "39.12", "BoolQA": "91.10", "Reading Comprehension": "22.00", "PartExpert": "50.06", "MMLU Pro": "42.80", "Iranian Social Norms": "73.48", "#Params (B)": 32.76, "Precision": "BF16", "Model sha": "d47b0d4ae4b48fde975756bf360a63a9cca8d470", "Hub License": "apache-2.0"}
11
- {"Model": "QwQ-32B-Preview", "model_name_for_query": "Qwen/QwQ-32B-Preview", "GeneralKnowledge": "63.27", "GSM8K": "34.70", "DC-Homograph": "61.11", "MC-Homograph": "88.25", "PiQA": "81.28", "Proverb-Quiz": "58.11", "VerbEval": "51.97", "Winogrande": "75.64", "Arc-Challenge": "85.58", "Arc-Easy": "91.44", "Feqh": "41.14", "Hallucination (Truthfulness)": "38.84", "P-Hellaswag": "84.13", "Law": "43.00", "AUT Multiple Choice": "50.60", "Parsi Literature": "39.77", "BoolQA": "88.50", "Reading Comprehension": "23.60", "PartExpert": "47.39", "MMLU Pro": "37.30", "Iranian Social Norms": "72.26", "#Params (B)": 32.76, "Precision": "BF16", "Model sha": "91906fe41a48b6a89ce2970abfd1269eefee170e", "Hub License": "apache-2.0"}
12
- {"Model": "gemma-3-12b-it", "model_name_for_query": "google/gemma-3-12b-it", "GeneralKnowledge": "68.37", "GSM8K": "20.20", "DC-Homograph": "67.59", "MC-Homograph": "91.24", "PiQA": "87.19", "Proverb-Quiz": "72.97", "VerbEval": "63.39", "Winogrande": "73.96", "Arc-Challenge": "83.33", "Arc-Easy": "93.26", "Feqh": "25.14", "Hallucination (Truthfulness)": "46.10", "P-Hellaswag": "83.17", "Law": "36.33", "AUT Multiple Choice": "49.00", "Parsi Literature": "40.03", "BoolQA": "87.60", "Reading Comprehension": "4.50", "PartExpert": "44.12", "MMLU Pro": "32.60", "Iranian Social Norms": "75.55", "#Params (B)": 12.18, "Precision": "BF16", "Model sha": "96b6f1eccf38110c56df3a15bffe176da04bfd80", "Hub License": "gemma"}
13
- {"Model": "gemma-2-27b-it", "model_name_for_query": "google/gemma-2-27b-it", "GeneralKnowledge": "68.11", "GSM8K": "26.70", "DC-Homograph": "60.19", "MC-Homograph": "91.24", "PiQA": "89.69", "Proverb-Quiz": "73.51", "VerbEval": "61.16", "Winogrande": "76.44", "Arc-Challenge": "86.75", "Arc-Easy": "94.22", "Feqh": "24.00", "Hallucination (Truthfulness)": "13.05", "P-Hellaswag": "83.69", "Law": "34.67", "AUT Multiple Choice": "50.80", "Parsi Literature": "35.91", "BoolQA": "89.80", "Reading Comprehension": "0.10", "PartExpert": "46.60", "MMLU Pro": "36.90", "Iranian Social Norms": "77.38", "#Params (B)": 27.22, "Precision": "BF16", "Model sha": "aaf20e6b9f4c0fcf043f6fb2a2068419086d77b0", "Hub License": "gemma"}
14
- {"Model": "aya-expanse-32b", "model_name_for_query": "CohereLabs/aya-expanse-32b", "GeneralKnowledge": "73.72", "GSM8K": "17.50", "DC-Homograph": "62.96", "MC-Homograph": "87.56", "PiQA": "91.19", "Proverb-Quiz": "77.03", "VerbEval": "61.95", "Winogrande": "70.50", "Arc-Challenge": "85.15", "Arc-Easy": "93.37", "Feqh": "37.14", "Hallucination (Truthfulness)": "44.84", "P-Hellaswag": "81.70", "Law": "38.67", "AUT Multiple Choice": "54.70", "Parsi Literature": "34.75", "BoolQA": "89.70", "Reading Comprehension": "24.90", "PartExpert": "44.29", "MMLU Pro": "32.10", "Iranian Social Norms": "74.94", "#Params (B)": 32.29, "Precision": "F16", "Model sha": "94bda1dcb97d260f732d230b832c7c685ae91e23", "Hub License": "cc-by-nc-4.0"}
15
- {"Model": "QwQ-32B", "model_name_for_query": "Qwen/QwQ-32B", "GeneralKnowledge": "60.71", "GSM8K": "29.30", "DC-Homograph": "58.33", "MC-Homograph": "88.25", "PiQA": "81.68", "Proverb-Quiz": "59.19", "VerbEval": "52.31", "Winogrande": "73.07", "Arc-Challenge": "84.94", "Arc-Easy": "90.80", "Feqh": "41.71", "Hallucination (Truthfulness)": "48.93", "P-Hellaswag": "82.22", "Law": "38.00", "AUT Multiple Choice": "49.30", "Parsi Literature": "37.71", "BoolQA": "88.50", "Reading Comprehension": "17.80", "PartExpert": "46.75", "MMLU Pro": "39.00", "Iranian Social Norms": "70.73", "#Params (B)": 32.76, "Precision": "BF16", "Model sha": "976055f8c83f394f35dbd3ab09a285a984907bd0", "Hub License": "apache-2.0"}
16
- {"Model": "gpt-4.1-nano-2025-04-14", "model_name_for_query": null, "GeneralKnowledge": "68.11", "GSM8K": "58.40", "DC-Homograph": "49.07", "MC-Homograph": "78.11", "PiQA": "84.58", "Proverb-Quiz": "67.84", "VerbEval": "66.21", "Winogrande": "60.32", "Arc-Challenge": "81.41", "Arc-Easy": "91.55", "Feqh": "32.00", "Hallucination (Truthfulness)": "51.24", "P-Hellaswag": "77.96", "Law": "32.67", "AUT Multiple Choice": "46.10", "Parsi Literature": "36.42", "BoolQA": "81.70", "Reading Comprehension": "6.30", "PartExpert": "42.49", "MMLU Pro": "29.90", "Iranian Social Norms": "74.76", "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
17
- {"Model": "Qwen3-14B", "model_name_for_query": "Qwen/Qwen3-14B", "GeneralKnowledge": "56.38", "GSM8K": "31.10", "DC-Homograph": "55.56", "MC-Homograph": "87.56", "PiQA": "77.18", "Proverb-Quiz": "53.78", "VerbEval": "54.36", "Winogrande": "67.32", "Arc-Challenge": "84.29", "Arc-Easy": "91.02", "Feqh": "29.14", "Hallucination (Truthfulness)": "44.54", "P-Hellaswag": "80.97", "Law": "34.67", "AUT Multiple Choice": "44.80", "Parsi Literature": "35.39", "BoolQA": "87.60", "Reading Comprehension": "24.40", "PartExpert": "43.22", "MMLU Pro": "35.50", "Iranian Social Norms": "74.51", "#Params (B)": 14.76, "Precision": "BF16", "Model sha": "8268fe3026cb304910457689366670e803a6fd56", "Hub License": "apache-2.0"}
18
- {"Model": "gemma-2-9b-it", "model_name_for_query": "google/gemma-2-9b-it", "GeneralKnowledge": "64.03", "GSM8K": "17.40", "DC-Homograph": "59.26", "MC-Homograph": "90.55", "PiQA": "87.09", "Proverb-Quiz": "69.19", "VerbEval": "58.25", "Winogrande": "72.01", "Arc-Challenge": "84.29", "Arc-Easy": "93.16", "Feqh": "29.71", "Hallucination (Truthfulness)": "50.58", "P-Hellaswag": "80.82", "Law": "33.67", "AUT Multiple Choice": "48.50", "Parsi Literature": "38.10", "BoolQA": "89.70", "Reading Comprehension": "0.10", "PartExpert": "43.03", "MMLU Pro": "33.20", "Iranian Social Norms": "73.84", "#Params (B)": 9.24, "Precision": "BF16", "Model sha": "11c9b309abf73637e4b6f9a3fa1e92e615547819", "Hub License": "gemma"}
19
- {"Model": "Qwen3-30B-A3B", "model_name_for_query": "Qwen/Qwen3-30B-A3B", "GeneralKnowledge": "65.05", "GSM8K": "28.80", "DC-Homograph": "57.41", "MC-Homograph": "86.41", "PiQA": "72.47", "Proverb-Quiz": "50.81", "VerbEval": "48.09", "Winogrande": "65.28", "Arc-Challenge": "87.39", "Arc-Easy": "93.58", "Feqh": "23.43", "Hallucination (Truthfulness)": "3.54", "P-Hellaswag": "83.10", "Law": "35.33", "AUT Multiple Choice": "48.00", "Parsi Literature": "36.55", "BoolQA": "86.20", "Reading Comprehension": "26.40", "PartExpert": "41.13", "MMLU Pro": "36.30", "Iranian Social Norms": "44.21", "#Params (B)": 30.53, "Precision": "BF16", "Model sha": "ae659febe817e4b3ebd7355f47792725801204c9", "Hub License": "apache-2.0"}
20
- {"Model": "aya-23-35B", "model_name_for_query": "CohereLabs/aya-23-35B", "GeneralKnowledge": "63.27", "GSM8K": "10.00", "DC-Homograph": "55.56", "MC-Homograph": "83.64", "PiQA": "89.49", "Proverb-Quiz": "67.03", "VerbEval": "47.32", "Winogrande": "65.81", "Arc-Challenge": "77.56", "Arc-Easy": "90.16", "Feqh": "30.29", "Hallucination (Truthfulness)": "11.72", "P-Hellaswag": "79.87", "Law": "32.00", "AUT Multiple Choice": "48.70", "Parsi Literature": "31.92", "BoolQA": "86.20", "Reading Comprehension": "23.70", "PartExpert": "37.44", "MMLU Pro": "24.10", "Iranian Social Norms": "65.00", "#Params (B)": 34.98, "Precision": "F16", "Model sha": "5e72bd5ad83e5e1612ee7f56a0c1a439a7cfb887", "Hub License": "cc-by-nc-4.0"}
21
- {"Model": "Qwen3-8B", "model_name_for_query": "Qwen/Qwen3-8B", "GeneralKnowledge": "49.23", "GSM8K": "25.70", "DC-Homograph": "50.93", "MC-Homograph": "82.95", "PiQA": "75.98", "Proverb-Quiz": "51.89", "VerbEval": "47.93", "Winogrande": "61.91", "Arc-Challenge": "80.24", "Arc-Easy": "87.38", "Feqh": "28.00", "Hallucination (Truthfulness)": "38.46", "P-Hellaswag": "80.38", "Law": "29.67", "AUT Multiple Choice": "46.00", "Parsi Literature": "33.20", "BoolQA": "86.40", "Reading Comprehension": "25.00", "PartExpert": "38.31", "MMLU Pro": "31.10", "Iranian Social Norms": "63.41", "#Params (B)": 8.19, "Precision": "BF16", "Model sha": "9c925d64d72725edaf899c6cb9c377fd0709d9c5", "Hub License": "apache-2.0"}
22
- {"Model": "aya-expanse-8b", "model_name_for_query": "CohereLabs/aya-expanse-8b", "GeneralKnowledge": "58.67", "GSM8K": "9.80", "DC-Homograph": "51.85", "MC-Homograph": "80.65", "PiQA": "80.18", "Proverb-Quiz": "60.00", "VerbEval": "48.06", "Winogrande": "64.04", "Arc-Challenge": "71.47", "Arc-Easy": "84.60", "Feqh": "29.71", "Hallucination (Truthfulness)": "23.52", "P-Hellaswag": "76.49", "Law": "32.33", "AUT Multiple Choice": "45.80", "Parsi Literature": "34.49", "BoolQA": "82.30", "Reading Comprehension": "20.10", "PartExpert": "35.56", "MMLU Pro": "21.90", "Iranian Social Norms": "71.71", "#Params (B)": 8.02, "Precision": "F16", "Model sha": "0ad43ec1e309e1351faa4b1d22713c065e37359a", "Hub License": "cc-by-nc-4.0"}
23
- {"Model": "Hormoz-8B", "model_name_for_query": "mann-e/Hormoz-8B", "GeneralKnowledge": "58.42", "GSM8K": "10.00", "DC-Homograph": "50.93", "MC-Homograph": "80.65", "PiQA": "80.68", "Proverb-Quiz": "60.27", "VerbEval": "47.29", "Winogrande": "64.39", "Arc-Challenge": "70.41", "Arc-Easy": "84.28", "Feqh": "28.57", "Hallucination (Truthfulness)": "23.66", "P-Hellaswag": "76.05", "Law": "30.33", "AUT Multiple Choice": "46.70", "Parsi Literature": "33.08", "BoolQA": "79.80", "Reading Comprehension": "19.60", "PartExpert": "35.68", "MMLU Pro": "21.50", "Iranian Social Norms": "70.30", "#Params (B)": 8.02, "Precision": "F32", "Model sha": "c91bcecb236c90523f70db7efa23dd794e9b4cff", "Hub License": "mit"}
24
- {"Model": "Llama-3.1-8B-Instruct", "model_name_for_query": "meta-llama/Llama-3.1-8B-Instruct", "GeneralKnowledge": "52.55", "GSM8K": "12.00", "DC-Homograph": "43.52", "MC-Homograph": "79.03", "PiQA": "70.07", "Proverb-Quiz": "47.57", "VerbEval": "42.91", "Winogrande": "54.21", "Arc-Challenge": "68.91", "Arc-Easy": "80.11", "Feqh": "29.71", "Hallucination (Truthfulness)": "6.76", "P-Hellaswag": "79.79", "Law": "32.67", "AUT Multiple Choice": "44.90", "Parsi Literature": "32.30", "BoolQA": "82.70", "Reading Comprehension": "24.50", "PartExpert": "37.62", "MMLU Pro": "25.70", "Iranian Social Norms": "70.98", "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "0e9e39f249a16976918f6564b8830bc894c89659", "Hub License": "llama3.1"}
25
- {"Model": "Qwen2.5-7B-Instruct", "model_name_for_query": "Qwen/Qwen2.5-7B-Instruct", "GeneralKnowledge": "51.02", "GSM8K": "18.00", "DC-Homograph": "52.78", "MC-Homograph": "79.26", "PiQA": "71.07", "Proverb-Quiz": "47.84", "VerbEval": "44.44", "Winogrande": "61.91", "Arc-Challenge": "72.33", "Arc-Easy": "81.50", "Feqh": "36.57", "Hallucination (Truthfulness)": "34.89", "P-Hellaswag": "74.80", "Law": "32.33", "AUT Multiple Choice": "42.60", "Parsi Literature": "31.27", "BoolQA": "82.50", "Reading Comprehension": "17.60", "PartExpert": "37.24", "MMLU Pro": "26.70", "Iranian Social Norms": "64.51", "#Params (B)": 7.61, "Precision": "BF16", "Model sha": "a09a35458c702b33eeacc393d103063234e8bc28", "Hub License": "apache-2.0"}
26
- {"Model": "aya-23-8B", "model_name_for_query": "CohereLabs/aya-23-8B", "GeneralKnowledge": "52.30", "GSM8K": "6.10", "DC-Homograph": "52.78", "MC-Homograph": "76.27", "PiQA": "80.78", "Proverb-Quiz": "44.32", "VerbEval": "39.30", "Winogrande": "57.13", "Arc-Challenge": "63.68", "Arc-Easy": "81.39", "Feqh": "29.14", "Hallucination (Truthfulness)": "0.60", "P-Hellaswag": "75.83", "Law": "28.33", "AUT Multiple Choice": "42.90", "Parsi Literature": "31.27", "BoolQA": "72.30", "Reading Comprehension": "23.40", "PartExpert": "33.33", "MMLU Pro": "19.90", "Iranian Social Norms": "70.73", "#Params (B)": 8.02, "Precision": "F16", "Model sha": "2a1a63b24af8f591616fdf58936ee576d63ca835", "Hub License": "cc-by-nc-4.0"}
27
- {"Model": "Qwen2-7B-Instruct", "model_name_for_query": "Qwen/Qwen2-7B-Instruct", "GeneralKnowledge": "52.04", "GSM8K": "14.50", "DC-Homograph": "54.63", "MC-Homograph": "72.81", "PiQA": "70.97", "Proverb-Quiz": "50.54", "VerbEval": "40.62", "Winogrande": "60.94", "Arc-Challenge": "69.12", "Arc-Easy": "80.75", "Feqh": "28.00", "Hallucination (Truthfulness)": "25.93", "P-Hellaswag": "76.71", "Law": "28.33", "AUT Multiple Choice": "40.40", "Parsi Literature": "31.40", "BoolQA": "79.00", "Reading Comprehension": "10.90", "PartExpert": "36.31", "MMLU Pro": "23.80", "Iranian Social Norms": "62.20", "#Params (B)": 7.61, "Precision": "BF16", "Model sha": "f2826a00ceef68f0f2b946d945ecc0477ce4450c", "Hub License": "apache-2.0"}
28
- {"Model": "Meta-Llama-3-8B-Instruct", "model_name_for_query": "meta-llama/Meta-Llama-3-8B-Instruct", "GeneralKnowledge": "52.04", "GSM8K": "10.40", "DC-Homograph": "41.67", "MC-Homograph": "81.11", "PiQA": "70.97", "Proverb-Quiz": "42.97", "VerbEval": "38.93", "Winogrande": "56.95", "Arc-Challenge": "66.77", "Arc-Easy": "76.47", "Feqh": "33.71", "Hallucination (Truthfulness)": "33.23", "P-Hellaswag": "76.71", "Law": "32.00", "AUT Multiple Choice": "45.00", "Parsi Literature": "29.99", "BoolQA": "82.50", "Reading Comprehension": "19.40", "PartExpert": "36.30", "MMLU Pro": "26.00", "Iranian Social Norms": "70.06", "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "5f0b02c75b57c5855da9ae460ce51323ea669d8a", "Hub License": "llama3"}
29
- {"Model": "gemma-3-4b-it", "model_name_for_query": "google/gemma-3-4b-it", "GeneralKnowledge": "45.92", "GSM8K": "9.60", "DC-Homograph": "42.59", "MC-Homograph": "72.58", "PiQA": "72.77", "Proverb-Quiz": "53.78", "VerbEval": "45.30", "Winogrande": "55.09", "Arc-Challenge": "63.46", "Arc-Easy": "79.57", "Feqh": "21.14", "Hallucination (Truthfulness)": "46.04", "P-Hellaswag": "73.84", "Law": "27.67", "AUT Multiple Choice": "42.50", "Parsi Literature": "30.24", "BoolQA": "78.60", "Reading Comprehension": "5.50", "PartExpert": "34.70", "MMLU Pro": "22.80", "Iranian Social Norms": "65.55", "#Params (B)": 4.3, "Precision": "BF16", "Model sha": "093f9f388b31de276ce2de164bdc2081324b9767", "Hub License": "gemma"}
30
- {"Model": "Qwen3-4B", "model_name_for_query": "Qwen/Qwen3-4B", "GeneralKnowledge": "43.88", "GSM8K": "20.10", "DC-Homograph": "38.89", "MC-Homograph": "76.27", "PiQA": "66.07", "Proverb-Quiz": "45.41", "VerbEval": "41.23", "Winogrande": "54.56", "Arc-Challenge": "73.61", "Arc-Easy": "83.42", "Feqh": "30.29", "Hallucination (Truthfulness)": "25.29", "P-Hellaswag": "78.03", "Law": "30.33", "AUT Multiple Choice": "40.60", "Parsi Literature": "31.79", "BoolQA": "81.90", "Reading Comprehension": "21.30", "PartExpert": "37.28", "MMLU Pro": "28.90", "Iranian Social Norms": "68.72", "#Params (B)": 4.02, "Precision": "BF16", "Model sha": "531c80e289d6cff3a7cd8c0db8110231d23a6f7a", "Hub License": "apache-2.0"}
31
- {"Model": "Hermes-3-Llama-3.1-8B", "model_name_for_query": "NousResearch/Hermes-3-Llama-3.1-8B", "GeneralKnowledge": "49.49", "GSM8K": "10.20", "DC-Homograph": "44.44", "MC-Homograph": "79.72", "PiQA": "70.37", "Proverb-Quiz": "47.84", "VerbEval": "48.94", "Winogrande": "55.18", "Arc-Challenge": "65.28", "Arc-Easy": "78.07", "Feqh": "30.29", "Hallucination (Truthfulness)": "45.20", "P-Hellaswag": "73.99", "Law": "31.67", "AUT Multiple Choice": "42.10", "Parsi Literature": "30.63", "BoolQA": "83.50", "Reading Comprehension": "13.50", "PartExpert": "35.61", "MMLU Pro": "24.10", "Iranian Social Norms": "54.88", "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "896ea440e5a9e6070e3d8a2774daf2b481ab425b", "Hub License": "llama3"}
32
- {"Model": "Dorna2-Llama3.1-8B-Instruct", "model_name_for_query": "PartAI/Dorna2-Llama3.1-8B-Instruct", "GeneralKnowledge": "48.72", "GSM8K": "11.90", "DC-Homograph": "44.44", "MC-Homograph": "72.81", "PiQA": "69.97", "Proverb-Quiz": "42.97", "VerbEval": "42.06", "Winogrande": "54.47", "Arc-Challenge": "67.63", "Arc-Easy": "78.72", "Feqh": "33.71", "Hallucination (Truthfulness)": "33.91", "P-Hellaswag": "78.91", "Law": "29.67", "AUT Multiple Choice": "41.00", "Parsi Literature": "27.28", "BoolQA": "81.80", "Reading Comprehension": "21.90", "PartExpert": "35.65", "MMLU Pro": "22.70", "Iranian Social Norms": "49.82", "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "b78e4bd261100c96e511ed5090ca0ce0e1f4b340", "Hub License": "llama3.1"}
33
- {"Model": "Llama-3.1-8B", "model_name_for_query": "meta-llama/Llama-3.1-8B", "GeneralKnowledge": "49.23", "GSM8K": "10.80", "DC-Homograph": "46.30", "MC-Homograph": "72.12", "PiQA": "66.47", "Proverb-Quiz": "35.95", "VerbEval": "39.91", "Winogrande": "54.92", "Arc-Challenge": "63.35", "Arc-Easy": "75.08", "Feqh": "30.86", "Hallucination (Truthfulness)": "45.02", "P-Hellaswag": "76.34", "Law": "33.00", "AUT Multiple Choice": "42.60", "Parsi Literature": "27.41", "BoolQA": "71.60", "Reading Comprehension": "19.70", "PartExpert": "35.92", "MMLU Pro": "22.20", "Iranian Social Norms": "61.83", "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", "Hub License": "llama3.1"}
34
- {"Model": "Meta-Llama-3-8B", "model_name_for_query": "meta-llama/Meta-Llama-3-8B", "GeneralKnowledge": "47.70", "GSM8K": "10.30", "DC-Homograph": "41.67", "MC-Homograph": "74.42", "PiQA": "64.16", "Proverb-Quiz": "37.30", "VerbEval": "39.46", "Winogrande": "55.36", "Arc-Challenge": "62.07", "Arc-Easy": "75.83", "Feqh": "27.43", "Hallucination (Truthfulness)": "37.00", "P-Hellaswag": "76.49", "Law": "35.67", "AUT Multiple Choice": "42.50", "Parsi Literature": "28.19", "BoolQA": "75.20", "Reading Comprehension": "19.50", "PartExpert": "35.10", "MMLU Pro": "22.80", "Iranian Social Norms": "54.02", "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", "Hub License": "llama3"}
35
- {"Model": "Dorna-Llama3-8B-Instruct", "model_name_for_query": "PartAI/Dorna-Llama3-8B-Instruct", "GeneralKnowledge": "41.33", "GSM8K": "10.30", "DC-Homograph": "40.74", "MC-Homograph": "74.65", "PiQA": "66.17", "Proverb-Quiz": "35.41", "VerbEval": "34.74", "Winogrande": "56.16", "Arc-Challenge": "59.94", "Arc-Easy": "70.70", "Feqh": "29.14", "Hallucination (Truthfulness)": "31.49", "P-Hellaswag": "75.68", "Law": "25.33", "AUT Multiple Choice": "36.90", "Parsi Literature": "27.54", "BoolQA": "80.10", "Reading Comprehension": "21.80", "PartExpert": "34.49", "MMLU Pro": "22.00", "Iranian Social Norms": "69.39", "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "fb268bb51b950b4db5b7c82c1b73d9e803020eed", "Hub License": "llama3"}
36
- {"Model": "gemma-2-2b-it", "model_name_for_query": "google/gemma-2-2b-it", "GeneralKnowledge": "32.91", "GSM8K": "6.40", "DC-Homograph": "47.22", "MC-Homograph": "74.65", "PiQA": "66.87", "Proverb-Quiz": "45.68", "VerbEval": "36.18", "Winogrande": "54.74", "Arc-Challenge": "57.91", "Arc-Easy": "70.48", "Feqh": "25.71", "Hallucination (Truthfulness)": "39.02", "P-Hellaswag": "69.88", "Law": "32.67", "AUT Multiple Choice": "36.90", "Parsi Literature": "30.76", "BoolQA": "72.40", "Reading Comprehension": "0.30", "PartExpert": "31.31", "MMLU Pro": "18.20", "Iranian Social Norms": "40.18", "#Params (B)": 2.61, "Precision": "BF16", "Model sha": "299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8", "Hub License": "gemma"}
37
- {"Model": "PersianMind-v1.0", "model_name_for_query": "universitytehran/PersianMind-v1.0", "GeneralKnowledge": "30.61", "GSM8K": "2.30", "DC-Homograph": "41.67", "MC-Homograph": "65.90", "PiQA": "59.76", "Proverb-Quiz": "34.32", "VerbEval": "26.26", "Winogrande": "52.17", "Arc-Challenge": "54.59", "Arc-Easy": "69.73", "Feqh": "26.29", "Hallucination (Truthfulness)": "2.37", "P-Hellaswag": "63.78", "Law": "27.33", "AUT Multiple Choice": "36.10", "Parsi Literature": "27.80", "BoolQA": "66.30", "Reading Comprehension": "-", "PartExpert": "29.75", "MMLU Pro": "14.50", "Iranian Social Norms": "48.41", "#Params (B)": 0.0, "Precision": "F32", "Model sha": "af603eeb074138e2a613fbc95d89f018afbd3041", "Hub License": "cc-by-nc-sa-4.0"}
38
- {"Model": "gemma-3-1b-it", "model_name_for_query": "google/gemma-3-1b-it", "GeneralKnowledge": "26.02", "GSM8K": "4.30", "DC-Homograph": "49.07", "MC-Homograph": "51.15", "PiQA": "57.66", "Proverb-Quiz": "28.92", "VerbEval": "27.67", "Winogrande": "50.58", "Arc-Challenge": "36.43", "Arc-Easy": "46.10", "Feqh": "28.00", "Hallucination (Truthfulness)": "54.94", "P-Hellaswag": "63.92", "Law": "20.33", "AUT Multiple Choice": "29.10", "Parsi Literature": "24.97", "BoolQA": "63.90", "Reading Comprehension": "2.10", "PartExpert": "27.22", "MMLU Pro": "13.70", "Iranian Social Norms": "51.22", "#Params (B)": 0.99, "Precision": "BF16", "Model sha": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", "Hub License": "gemma"}
39
- {"Model": "Llama-3.2-1B-Instruct", "model_name_for_query": "meta-llama/Llama-3.2-1B-Instruct", "GeneralKnowledge": "29.59", "GSM8K": "4.10", "DC-Homograph": "50.93", "MC-Homograph": "52.53", "PiQA": "54.05", "Proverb-Quiz": "28.65", "VerbEval": "26.11", "Winogrande": "49.07", "Arc-Challenge": "37.50", "Arc-Easy": "47.38", "Feqh": "31.43", "Hallucination (Truthfulness)": "3.34", "P-Hellaswag": "55.40", "Law": "24.00", "AUT Multiple Choice": "29.90", "Parsi Literature": "27.03", "BoolQA": "64.10", "Reading Comprehension": "7.20", "PartExpert": "28.59", "MMLU Pro": "15.70", "Iranian Social Norms": "37.44", "#Params (B)": 1.23, "Precision": "BF16", "Model sha": "9213176726f574b556790deb65791e0c5aa438b6", "Hub License": "llama3.2"}
40
- {"Model": "Maral-7B-alpha-1", "model_name_for_query": "MaralGPT/Maral-7B-alpha-1", "GeneralKnowledge": "31.63", "GSM8K": "6.10", "DC-Homograph": "43.52", "MC-Homograph": "47.47", "PiQA": "51.95", "Proverb-Quiz": "22.16", "VerbEval": "28.96", "Winogrande": "49.42", "Arc-Challenge": "37.29", "Arc-Easy": "43.10", "Feqh": "26.29", "Hallucination (Truthfulness)": "0.00", "P-Hellaswag": "60.18", "Law": "26.33", "AUT Multiple Choice": "28.40", "Parsi Literature": "26.77", "BoolQA": "62.70", "Reading Comprehension": "10.80", "PartExpert": "27.10", "MMLU Pro": "14.80", "Iranian Social Norms": "24.63", "#Params (B)": 7.24, "Precision": "BF16", "Model sha": "2ab5ca2a0d1a4454a78b4ca911e595bb9da2fe2f", "Hub License": "mit"}
 
1
+ {"Model": "gpt-4o-2024-08-06", "model_name_for_query": null, "GeneralKnowledge": 90.82, "GSM8K": 73.1, "DC-Homograph": 87.04, "MC-Homograph": 95.62, "PiQA": 95.1, "Proverb-Quiz": 96.76, "VerbEval": 85.89, "Winogrande": 86.18, "Arc-Challenge": 95.09, "Arc-Easy": 97.22, "Feqh": 46.86, "Hallucination (Truthfulness)": 74.64, "P-Hellaswag": 85.53, "Law": 47.67, "AUT Multiple Choice": 67.7, "Parsi Literature": 45.95, "BoolQA": 94.1, "Reading Comprehension": 10.0, "PartExpert": 57.36, "MMLU Pro": 47.1, "Iranian Social Norms": 76.89, "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
2
+ {"Model": "gpt-4.1-2025-04-14", "model_name_for_query": null, "GeneralKnowledge": 90.82, "GSM8K": 25.3, "DC-Homograph": 89.81, "MC-Homograph": 95.39, "PiQA": 95.9, "Proverb-Quiz": 95.14, "VerbEval": 83.04, "Winogrande": 85.92, "Arc-Challenge": 95.3, "Arc-Easy": 96.68, "Feqh": 52.0, "Hallucination (Truthfulness)": 77.43, "P-Hellaswag": 85.67, "Law": 53.67, "AUT Multiple Choice": 66.6, "Parsi Literature": 45.82, "BoolQA": 94.7, "Reading Comprehension": 3.6, "PartExpert": 59.92, "MMLU Pro": 50.5, "Iranian Social Norms": 77.56, "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
3
+ {"Model": "google__gemini-2.0-flash-001", "model_name_for_query": null, "GeneralKnowledge": 87.76, "GSM8K": 53.7, "DC-Homograph": 79.63, "MC-Homograph": 91.71, "PiQA": 90.59, "Proverb-Quiz": 95.14, "VerbEval": 85.15, "Winogrande": 78.74, "Arc-Challenge": 91.35, "Arc-Easy": 97.22, "Feqh": 53.14, "Hallucination (Truthfulness)": 68.87, "P-Hellaswag": 82.95, "Law": 45.67, "AUT Multiple Choice": 60.9, "Parsi Literature": 44.02, "BoolQA": 91.3, "Reading Comprehension": 23.9, "PartExpert": 59.5, "MMLU Pro": 47.8, "Iranian Social Norms": 77.68, "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
4
+ {"Model": "deepseek-v3-03-24", "model_name_for_query": null, "GeneralKnowledge": 85.71, "GSM8K": 53.1, "DC-Homograph": 83.33, "MC-Homograph": 94.24, "PiQA": 91.39, "Proverb-Quiz": 84.86, "VerbEval": 81.11, "Winogrande": 76.71, "Arc-Challenge": 92.31, "Arc-Easy": 96.58, "Feqh": 42.29, "Hallucination (Truthfulness)": 55.54, "P-Hellaswag": 85.3, "Law": 46.0, "AUT Multiple Choice": 65.6, "Parsi Literature": 44.66, "BoolQA": 95.3, "Reading Comprehension": 20.7, "PartExpert": 58.46, "MMLU Pro": 53.4, "Iranian Social Norms": 71.71, "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
5
+ {"Model": "gpt-4.1-mini-2025-04-14", "model_name_for_query": null, "GeneralKnowledge": 79.34, "GSM8K": 60.3, "DC-Homograph": 66.67, "MC-Homograph": 94.24, "PiQA": 92.69, "Proverb-Quiz": 82.97, "VerbEval": 77.99, "Winogrande": 80.07, "Arc-Challenge": 91.88, "Arc-Easy": 96.15, "Feqh": 37.71, "Hallucination (Truthfulness)": 66.55, "P-Hellaswag": 84.57, "Law": 44.33, "AUT Multiple Choice": 53.5, "Parsi Literature": 41.18, "BoolQA": 93.7, "Reading Comprehension": 7.7, "PartExpert": 54.37, "MMLU Pro": 47.8, "Iranian Social Norms": 73.35, "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
6
+ {"Model": "gpt-4o-mini-2024-07-18", "model_name_for_query": null, "GeneralKnowledge": 79.08, "GSM8K": 60.9, "DC-Homograph": 68.52, "MC-Homograph": 90.09, "PiQA": 90.89, "Proverb-Quiz": 84.05, "VerbEval": 74.23, "Winogrande": 75.73, "Arc-Challenge": 86.43, "Arc-Easy": 94.01, "Feqh": 41.71, "Hallucination (Truthfulness)": 82.04, "P-Hellaswag": 83.84, "Law": 34.0, "AUT Multiple Choice": 54.8, "Parsi Literature": 40.93, "BoolQA": 93.3, "Reading Comprehension": 20.5, "PartExpert": 42.54, "MMLU Pro": 34.8, "Iranian Social Norms": 71.59, "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
7
+ {"Model": "google__gemini-2.0-flash-lite-001", "model_name_for_query": null, "GeneralKnowledge": 84.18, "GSM8K": 39.7, "DC-Homograph": 60.19, "MC-Homograph": 87.79, "PiQA": 85.29, "Proverb-Quiz": 91.35, "VerbEval": 81.39, "Winogrande": 75.64, "Arc-Challenge": 89.64, "Arc-Easy": 93.48, "Feqh": 41.71, "Hallucination (Truthfulness)": 67.32, "P-Hellaswag": 83.54, "Law": 43.0, "AUT Multiple Choice": 58.5, "Parsi Literature": 43.89, "BoolQA": 92.6, "Reading Comprehension": 26.7, "PartExpert": 54.15, "MMLU Pro": 41.2, "Iranian Social Norms": 70.49, "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
8
+ {"Model": "Qwen2.5-32B-Instruct", "model_name_for_query": "Qwen/Qwen2.5-32B-Instruct", "GeneralKnowledge": 61.73, "GSM8K": 50.1, "DC-Homograph": 67.59, "MC-Homograph": 91.47, "PiQA": 83.98, "Proverb-Quiz": 63.24, "VerbEval": 54.58, "Winogrande": 80.07, "Arc-Challenge": 85.15, "Arc-Easy": 91.87, "Feqh": 38.86, "Hallucination (Truthfulness)": 59.22, "P-Hellaswag": 82.07, "Law": 42.33, "AUT Multiple Choice": 50.4, "Parsi Literature": 40.41, "BoolQA": 93.4, "Reading Comprehension": 7.4, "PartExpert": 46.78, "MMLU Pro": 37.4, "Iranian Social Norms": 70.0, "#Params (B)": 32.76, "Precision": "BF16", "Model sha": "5ede1c97bbab6ce5cda5812749b4c0bdf79b18dd", "Hub License": "apache-2.0"}
9
+ {"Model": "gemma-3-27b-it", "model_name_for_query": "google/gemma-3-27b-it", "GeneralKnowledge": 73.72, "GSM8K": 28.3, "DC-Homograph": 63.89, "MC-Homograph": 92.4, "PiQA": 87.29, "Proverb-Quiz": 78.92, "VerbEval": 66.02, "Winogrande": 78.12, "Arc-Challenge": 88.35, "Arc-Easy": 94.22, "Feqh": 24.57, "Hallucination (Truthfulness)": 60.15, "P-Hellaswag": 83.39, "Law": 36.33, "AUT Multiple Choice": 55.2, "Parsi Literature": 40.93, "BoolQA": 91.4, "Reading Comprehension": 1.2, "PartExpert": 49.32, "MMLU Pro": 36.6, "Iranian Social Norms": 70.49, "#Params (B)": 27.43, "Precision": "BF16", "Model sha": "005ad3404e59d6023443cb575daa05336842228a", "Hub License": "gemma"}
10
+ {"Model": "Qwen3-32B", "model_name_for_query": "Qwen/Qwen3-32B", "GeneralKnowledge": 67.6, "GSM8K": 37.9, "DC-Homograph": 51.85, "MC-Homograph": 89.63, "PiQA": 87.69, "Proverb-Quiz": 64.59, "VerbEval": 56.35, "Winogrande": 71.48, "Arc-Challenge": 91.13, "Arc-Easy": 94.22, "Feqh": 29.71, "Hallucination (Truthfulness)": 47.5, "P-Hellaswag": 83.47, "Law": 37.0, "AUT Multiple Choice": 48.3, "Parsi Literature": 39.12, "BoolQA": 91.1, "Reading Comprehension": 22.0, "PartExpert": 50.06, "MMLU Pro": 42.8, "Iranian Social Norms": 73.48, "#Params (B)": 32.76, "Precision": "BF16", "Model sha": "d47b0d4ae4b48fde975756bf360a63a9cca8d470", "Hub License": "apache-2.0"}
11
+ {"Model": "QwQ-32B-Preview", "model_name_for_query": "Qwen/QwQ-32B-Preview", "GeneralKnowledge": 63.27, "GSM8K": 34.7, "DC-Homograph": 61.11, "MC-Homograph": 88.25, "PiQA": 81.28, "Proverb-Quiz": 58.11, "VerbEval": 51.97, "Winogrande": 75.64, "Arc-Challenge": 85.58, "Arc-Easy": 91.44, "Feqh": 41.14, "Hallucination (Truthfulness)": 38.84, "P-Hellaswag": 84.13, "Law": 43.0, "AUT Multiple Choice": 50.6, "Parsi Literature": 39.77, "BoolQA": 88.5, "Reading Comprehension": 23.6, "PartExpert": 47.39, "MMLU Pro": 37.3, "Iranian Social Norms": 72.26, "#Params (B)": 32.76, "Precision": "BF16", "Model sha": "91906fe41a48b6a89ce2970abfd1269eefee170e", "Hub License": "apache-2.0"}
12
+ {"Model": "gemma-3-12b-it", "model_name_for_query": "google/gemma-3-12b-it", "GeneralKnowledge": 68.37, "GSM8K": 20.2, "DC-Homograph": 67.59, "MC-Homograph": 91.24, "PiQA": 87.19, "Proverb-Quiz": 72.97, "VerbEval": 63.39, "Winogrande": 73.96, "Arc-Challenge": 83.33, "Arc-Easy": 93.26, "Feqh": 25.14, "Hallucination (Truthfulness)": 46.1, "P-Hellaswag": 83.17, "Law": 36.33, "AUT Multiple Choice": 49.0, "Parsi Literature": 40.03, "BoolQA": 87.6, "Reading Comprehension": 4.5, "PartExpert": 44.12, "MMLU Pro": 32.6, "Iranian Social Norms": 75.55, "#Params (B)": 12.18, "Precision": "BF16", "Model sha": "96b6f1eccf38110c56df3a15bffe176da04bfd80", "Hub License": "gemma"}
13
+ {"Model": "gemma-2-27b-it", "model_name_for_query": "google/gemma-2-27b-it", "GeneralKnowledge": 68.11, "GSM8K": 26.7, "DC-Homograph": 60.19, "MC-Homograph": 91.24, "PiQA": 89.69, "Proverb-Quiz": 73.51, "VerbEval": 61.16, "Winogrande": 76.44, "Arc-Challenge": 86.75, "Arc-Easy": 94.22, "Feqh": 24.0, "Hallucination (Truthfulness)": 13.05, "P-Hellaswag": 83.69, "Law": 34.67, "AUT Multiple Choice": 50.8, "Parsi Literature": 35.91, "BoolQA": 89.8, "Reading Comprehension": 0.1, "PartExpert": 46.6, "MMLU Pro": 36.9, "Iranian Social Norms": 77.38, "#Params (B)": 27.22, "Precision": "BF16", "Model sha": "aaf20e6b9f4c0fcf043f6fb2a2068419086d77b0", "Hub License": "gemma"}
14
+ {"Model": "aya-expanse-32b", "model_name_for_query": "CohereLabs/aya-expanse-32b", "GeneralKnowledge": 73.72, "GSM8K": 17.5, "DC-Homograph": 62.96, "MC-Homograph": 87.56, "PiQA": 91.19, "Proverb-Quiz": 77.03, "VerbEval": 61.95, "Winogrande": 70.5, "Arc-Challenge": 85.15, "Arc-Easy": 93.37, "Feqh": 37.14, "Hallucination (Truthfulness)": 44.84, "P-Hellaswag": 81.7, "Law": 38.67, "AUT Multiple Choice": 54.7, "Parsi Literature": 34.75, "BoolQA": 89.7, "Reading Comprehension": 24.9, "PartExpert": 44.29, "MMLU Pro": 32.1, "Iranian Social Norms": 74.94, "#Params (B)": 32.29, "Precision": "F16", "Model sha": "94bda1dcb97d260f732d230b832c7c685ae91e23", "Hub License": "cc-by-nc-4.0"}
15
+ {"Model": "QwQ-32B", "model_name_for_query": "Qwen/QwQ-32B", "GeneralKnowledge": 60.71, "GSM8K": 29.3, "DC-Homograph": 58.33, "MC-Homograph": 88.25, "PiQA": 81.68, "Proverb-Quiz": 59.19, "VerbEval": 52.31, "Winogrande": 73.07, "Arc-Challenge": 84.94, "Arc-Easy": 90.8, "Feqh": 41.71, "Hallucination (Truthfulness)": 48.93, "P-Hellaswag": 82.22, "Law": 38.0, "AUT Multiple Choice": 49.3, "Parsi Literature": 37.71, "BoolQA": 88.5, "Reading Comprehension": 17.8, "PartExpert": 46.75, "MMLU Pro": 39.0, "Iranian Social Norms": 70.73, "#Params (B)": 32.76, "Precision": "BF16", "Model sha": "976055f8c83f394f35dbd3ab09a285a984907bd0", "Hub License": "apache-2.0"}
16
+ {"Model": "gpt-4.1-nano-2025-04-14", "model_name_for_query": null, "GeneralKnowledge": 68.11, "GSM8K": 58.4, "DC-Homograph": 49.07, "MC-Homograph": 78.11, "PiQA": 84.58, "Proverb-Quiz": 67.84, "VerbEval": 66.21, "Winogrande": 60.32, "Arc-Challenge": 81.41, "Arc-Easy": 91.55, "Feqh": 32.0, "Hallucination (Truthfulness)": 51.24, "P-Hellaswag": 77.96, "Law": 32.67, "AUT Multiple Choice": 46.1, "Parsi Literature": 36.42, "BoolQA": 81.7, "Reading Comprehension": 6.3, "PartExpert": 42.49, "MMLU Pro": 29.9, "Iranian Social Norms": 74.76, "#Params (B)": "unknown", "Precision": "unknown", "Model sha": "unknown", "Hub License": "unknown"}
17
+ {"Model": "Qwen3-14B", "model_name_for_query": "Qwen/Qwen3-14B", "GeneralKnowledge": 56.38, "GSM8K": 31.1, "DC-Homograph": 55.56, "MC-Homograph": 87.56, "PiQA": 77.18, "Proverb-Quiz": 53.78, "VerbEval": 54.36, "Winogrande": 67.32, "Arc-Challenge": 84.29, "Arc-Easy": 91.02, "Feqh": 29.14, "Hallucination (Truthfulness)": 44.54, "P-Hellaswag": 80.97, "Law": 34.67, "AUT Multiple Choice": 44.8, "Parsi Literature": 35.39, "BoolQA": 87.6, "Reading Comprehension": 24.4, "PartExpert": 43.22, "MMLU Pro": 35.5, "Iranian Social Norms": 74.51, "#Params (B)": 14.76, "Precision": "BF16", "Model sha": "8268fe3026cb304910457689366670e803a6fd56", "Hub License": "apache-2.0"}
18
+ {"Model": "gemma-2-9b-it", "model_name_for_query": "google/gemma-2-9b-it", "GeneralKnowledge": 64.03, "GSM8K": 17.4, "DC-Homograph": 59.26, "MC-Homograph": 90.55, "PiQA": 87.09, "Proverb-Quiz": 69.19, "VerbEval": 58.25, "Winogrande": 72.01, "Arc-Challenge": 84.29, "Arc-Easy": 93.16, "Feqh": 29.71, "Hallucination (Truthfulness)": 50.58, "P-Hellaswag": 80.82, "Law": 33.67, "AUT Multiple Choice": 48.5, "Parsi Literature": 38.1, "BoolQA": 89.7, "Reading Comprehension": 0.1, "PartExpert": 43.03, "MMLU Pro": 33.2, "Iranian Social Norms": 73.84, "#Params (B)": 9.24, "Precision": "BF16", "Model sha": "11c9b309abf73637e4b6f9a3fa1e92e615547819", "Hub License": "gemma"}
19
+ {"Model": "Qwen3-30B-A3B", "model_name_for_query": "Qwen/Qwen3-30B-A3B", "GeneralKnowledge": 65.05, "GSM8K": 28.8, "DC-Homograph": 57.41, "MC-Homograph": 86.41, "PiQA": 72.47, "Proverb-Quiz": 50.81, "VerbEval": 48.09, "Winogrande": 65.28, "Arc-Challenge": 87.39, "Arc-Easy": 93.58, "Feqh": 23.43, "Hallucination (Truthfulness)": 3.54, "P-Hellaswag": 83.1, "Law": 35.33, "AUT Multiple Choice": 48.0, "Parsi Literature": 36.55, "BoolQA": 86.2, "Reading Comprehension": 26.4, "PartExpert": 41.13, "MMLU Pro": 36.3, "Iranian Social Norms": 44.21, "#Params (B)": 30.53, "Precision": "BF16", "Model sha": "ae659febe817e4b3ebd7355f47792725801204c9", "Hub License": "apache-2.0"}
20
+ {"Model": "aya-23-35B", "model_name_for_query": "CohereLabs/aya-23-35B", "GeneralKnowledge": 63.27, "GSM8K": 10.0, "DC-Homograph": 55.56, "MC-Homograph": 83.64, "PiQA": 89.49, "Proverb-Quiz": 67.03, "VerbEval": 47.32, "Winogrande": 65.81, "Arc-Challenge": 77.56, "Arc-Easy": 90.16, "Feqh": 30.29, "Hallucination (Truthfulness)": 11.72, "P-Hellaswag": 79.87, "Law": 32.0, "AUT Multiple Choice": 48.7, "Parsi Literature": 31.92, "BoolQA": 86.2, "Reading Comprehension": 23.7, "PartExpert": 37.44, "MMLU Pro": 24.1, "Iranian Social Norms": 65.0, "#Params (B)": 34.98, "Precision": "F16", "Model sha": "5e72bd5ad83e5e1612ee7f56a0c1a439a7cfb887", "Hub License": "cc-by-nc-4.0"}
21
+ {"Model": "Qwen3-8B", "model_name_for_query": "Qwen/Qwen3-8B", "GeneralKnowledge": 49.23, "GSM8K": 25.7, "DC-Homograph": 50.93, "MC-Homograph": 82.95, "PiQA": 75.98, "Proverb-Quiz": 51.89, "VerbEval": 47.93, "Winogrande": 61.91, "Arc-Challenge": 80.24, "Arc-Easy": 87.38, "Feqh": 28.0, "Hallucination (Truthfulness)": 38.46, "P-Hellaswag": 80.38, "Law": 29.67, "AUT Multiple Choice": 46.0, "Parsi Literature": 33.2, "BoolQA": 86.4, "Reading Comprehension": 25.0, "PartExpert": 38.31, "MMLU Pro": 31.1, "Iranian Social Norms": 63.41, "#Params (B)": 8.19, "Precision": "BF16", "Model sha": "9c925d64d72725edaf899c6cb9c377fd0709d9c5", "Hub License": "apache-2.0"}
22
+ {"Model": "aya-expanse-8b", "model_name_for_query": "CohereLabs/aya-expanse-8b", "GeneralKnowledge": 58.67, "GSM8K": 9.8, "DC-Homograph": 51.85, "MC-Homograph": 80.65, "PiQA": 80.18, "Proverb-Quiz": 60.0, "VerbEval": 48.06, "Winogrande": 64.04, "Arc-Challenge": 71.47, "Arc-Easy": 84.6, "Feqh": 29.71, "Hallucination (Truthfulness)": 23.52, "P-Hellaswag": 76.49, "Law": 32.33, "AUT Multiple Choice": 45.8, "Parsi Literature": 34.49, "BoolQA": 82.3, "Reading Comprehension": 20.1, "PartExpert": 35.56, "MMLU Pro": 21.9, "Iranian Social Norms": 71.71, "#Params (B)": 8.02, "Precision": "F16", "Model sha": "0ad43ec1e309e1351faa4b1d22713c065e37359a", "Hub License": "cc-by-nc-4.0"}
23
+ {"Model": "Hormoz-8B", "model_name_for_query": "mann-e/Hormoz-8B", "GeneralKnowledge": 58.42, "GSM8K": 10.0, "DC-Homograph": 50.93, "MC-Homograph": 80.65, "PiQA": 80.68, "Proverb-Quiz": 60.27, "VerbEval": 47.29, "Winogrande": 64.39, "Arc-Challenge": 70.41, "Arc-Easy": 84.28, "Feqh": 28.57, "Hallucination (Truthfulness)": 23.66, "P-Hellaswag": 76.05, "Law": 30.33, "AUT Multiple Choice": 46.7, "Parsi Literature": 33.08, "BoolQA": 79.8, "Reading Comprehension": 19.6, "PartExpert": 35.68, "MMLU Pro": 21.5, "Iranian Social Norms": 70.3, "#Params (B)": 8.02, "Precision": "F32", "Model sha": "c91bcecb236c90523f70db7efa23dd794e9b4cff", "Hub License": "mit"}
24
+ {"Model": "Llama-3.1-8B-Instruct", "model_name_for_query": "meta-llama/Llama-3.1-8B-Instruct", "GeneralKnowledge": 52.55, "GSM8K": 12.0, "DC-Homograph": 43.52, "MC-Homograph": 79.03, "PiQA": 70.07, "Proverb-Quiz": 47.57, "VerbEval": 42.91, "Winogrande": 54.21, "Arc-Challenge": 68.91, "Arc-Easy": 80.11, "Feqh": 29.71, "Hallucination (Truthfulness)": 6.76, "P-Hellaswag": 79.79, "Law": 32.67, "AUT Multiple Choice": 44.9, "Parsi Literature": 32.3, "BoolQA": 82.7, "Reading Comprehension": 24.5, "PartExpert": 37.62, "MMLU Pro": 25.7, "Iranian Social Norms": 70.98, "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "0e9e39f249a16976918f6564b8830bc894c89659", "Hub License": "llama3.1"}
25
+ {"Model": "Qwen2.5-7B-Instruct", "model_name_for_query": "Qwen/Qwen2.5-7B-Instruct", "GeneralKnowledge": 51.02, "GSM8K": 18.0, "DC-Homograph": 52.78, "MC-Homograph": 79.26, "PiQA": 71.07, "Proverb-Quiz": 47.84, "VerbEval": 44.44, "Winogrande": 61.91, "Arc-Challenge": 72.33, "Arc-Easy": 81.5, "Feqh": 36.57, "Hallucination (Truthfulness)": 34.89, "P-Hellaswag": 74.8, "Law": 32.33, "AUT Multiple Choice": 42.6, "Parsi Literature": 31.27, "BoolQA": 82.5, "Reading Comprehension": 17.6, "PartExpert": 37.24, "MMLU Pro": 26.7, "Iranian Social Norms": 64.51, "#Params (B)": 7.61, "Precision": "BF16", "Model sha": "a09a35458c702b33eeacc393d103063234e8bc28", "Hub License": "apache-2.0"}
26
+ {"Model": "aya-23-8B", "model_name_for_query": "CohereLabs/aya-23-8B", "GeneralKnowledge": 52.3, "GSM8K": 6.1, "DC-Homograph": 52.78, "MC-Homograph": 76.27, "PiQA": 80.78, "Proverb-Quiz": 44.32, "VerbEval": 39.3, "Winogrande": 57.13, "Arc-Challenge": 63.68, "Arc-Easy": 81.39, "Feqh": 29.14, "Hallucination (Truthfulness)": 0.6, "P-Hellaswag": 75.83, "Law": 28.33, "AUT Multiple Choice": 42.9, "Parsi Literature": 31.27, "BoolQA": 72.3, "Reading Comprehension": 23.4, "PartExpert": 33.33, "MMLU Pro": 19.9, "Iranian Social Norms": 70.73, "#Params (B)": 8.02, "Precision": "F16", "Model sha": "2a1a63b24af8f591616fdf58936ee576d63ca835", "Hub License": "cc-by-nc-4.0"}
27
+ {"Model": "Qwen2-7B-Instruct", "model_name_for_query": "Qwen/Qwen2-7B-Instruct", "GeneralKnowledge": 52.04, "GSM8K": 14.5, "DC-Homograph": 54.63, "MC-Homograph": 72.81, "PiQA": 70.97, "Proverb-Quiz": 50.54, "VerbEval": 40.62, "Winogrande": 60.94, "Arc-Challenge": 69.12, "Arc-Easy": 80.75, "Feqh": 28.0, "Hallucination (Truthfulness)": 25.93, "P-Hellaswag": 76.71, "Law": 28.33, "AUT Multiple Choice": 40.4, "Parsi Literature": 31.4, "BoolQA": 79.0, "Reading Comprehension": 10.9, "PartExpert": 36.31, "MMLU Pro": 23.8, "Iranian Social Norms": 62.2, "#Params (B)": 7.61, "Precision": "BF16", "Model sha": "f2826a00ceef68f0f2b946d945ecc0477ce4450c", "Hub License": "apache-2.0"}
28
+ {"Model": "Meta-Llama-3-8B-Instruct", "model_name_for_query": "meta-llama/Meta-Llama-3-8B-Instruct", "GeneralKnowledge": 52.04, "GSM8K": 10.4, "DC-Homograph": 41.67, "MC-Homograph": 81.11, "PiQA": 70.97, "Proverb-Quiz": 42.97, "VerbEval": 38.93, "Winogrande": 56.95, "Arc-Challenge": 66.77, "Arc-Easy": 76.47, "Feqh": 33.71, "Hallucination (Truthfulness)": 33.23, "P-Hellaswag": 76.71, "Law": 32.0, "AUT Multiple Choice": 45.0, "Parsi Literature": 29.99, "BoolQA": 82.5, "Reading Comprehension": 19.4, "PartExpert": 36.3, "MMLU Pro": 26.0, "Iranian Social Norms": 70.06, "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "5f0b02c75b57c5855da9ae460ce51323ea669d8a", "Hub License": "llama3"}
29
+ {"Model": "gemma-3-4b-it", "model_name_for_query": "google/gemma-3-4b-it", "GeneralKnowledge": 45.92, "GSM8K": 9.6, "DC-Homograph": 42.59, "MC-Homograph": 72.58, "PiQA": 72.77, "Proverb-Quiz": 53.78, "VerbEval": 45.3, "Winogrande": 55.09, "Arc-Challenge": 63.46, "Arc-Easy": 79.57, "Feqh": 21.14, "Hallucination (Truthfulness)": 46.04, "P-Hellaswag": 73.84, "Law": 27.67, "AUT Multiple Choice": 42.5, "Parsi Literature": 30.24, "BoolQA": 78.6, "Reading Comprehension": 5.5, "PartExpert": 34.7, "MMLU Pro": 22.8, "Iranian Social Norms": 65.55, "#Params (B)": 4.3, "Precision": "BF16", "Model sha": "093f9f388b31de276ce2de164bdc2081324b9767", "Hub License": "gemma"}
30
+ {"Model": "Qwen3-4B", "model_name_for_query": "Qwen/Qwen3-4B", "GeneralKnowledge": 43.88, "GSM8K": 20.1, "DC-Homograph": 38.89, "MC-Homograph": 76.27, "PiQA": 66.07, "Proverb-Quiz": 45.41, "VerbEval": 41.23, "Winogrande": 54.56, "Arc-Challenge": 73.61, "Arc-Easy": 83.42, "Feqh": 30.29, "Hallucination (Truthfulness)": 25.29, "P-Hellaswag": 78.03, "Law": 30.33, "AUT Multiple Choice": 40.6, "Parsi Literature": 31.79, "BoolQA": 81.9, "Reading Comprehension": 21.3, "PartExpert": 37.28, "MMLU Pro": 28.9, "Iranian Social Norms": 68.72, "#Params (B)": 4.02, "Precision": "BF16", "Model sha": "531c80e289d6cff3a7cd8c0db8110231d23a6f7a", "Hub License": "apache-2.0"}
31
+ {"Model": "Hermes-3-Llama-3.1-8B", "model_name_for_query": "NousResearch/Hermes-3-Llama-3.1-8B", "GeneralKnowledge": 49.49, "GSM8K": 10.2, "DC-Homograph": 44.44, "MC-Homograph": 79.72, "PiQA": 70.37, "Proverb-Quiz": 47.84, "VerbEval": 48.94, "Winogrande": 55.18, "Arc-Challenge": 65.28, "Arc-Easy": 78.07, "Feqh": 30.29, "Hallucination (Truthfulness)": 45.2, "P-Hellaswag": 73.99, "Law": 31.67, "AUT Multiple Choice": 42.1, "Parsi Literature": 30.63, "BoolQA": 83.5, "Reading Comprehension": 13.5, "PartExpert": 35.61, "MMLU Pro": 24.1, "Iranian Social Norms": 54.88, "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "896ea440e5a9e6070e3d8a2774daf2b481ab425b", "Hub License": "llama3"}
32
+ {"Model": "Dorna2-Llama3.1-8B-Instruct", "model_name_for_query": "PartAI/Dorna2-Llama3.1-8B-Instruct", "GeneralKnowledge": 48.72, "GSM8K": 11.9, "DC-Homograph": 44.44, "MC-Homograph": 72.81, "PiQA": 69.97, "Proverb-Quiz": 42.97, "VerbEval": 42.06, "Winogrande": 54.47, "Arc-Challenge": 67.63, "Arc-Easy": 78.72, "Feqh": 33.71, "Hallucination (Truthfulness)": 33.91, "P-Hellaswag": 78.91, "Law": 29.67, "AUT Multiple Choice": 41.0, "Parsi Literature": 27.28, "BoolQA": 81.8, "Reading Comprehension": 21.9, "PartExpert": 35.65, "MMLU Pro": 22.7, "Iranian Social Norms": 49.82, "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "b78e4bd261100c96e511ed5090ca0ce0e1f4b340", "Hub License": "llama3.1"}
33
+ {"Model": "Llama-3.1-8B", "model_name_for_query": "meta-llama/Llama-3.1-8B", "GeneralKnowledge": 49.23, "GSM8K": 10.8, "DC-Homograph": 46.3, "MC-Homograph": 72.12, "PiQA": 66.47, "Proverb-Quiz": 35.95, "VerbEval": 39.91, "Winogrande": 54.92, "Arc-Challenge": 63.35, "Arc-Easy": 75.08, "Feqh": 30.86, "Hallucination (Truthfulness)": 45.02, "P-Hellaswag": 76.34, "Law": 33.0, "AUT Multiple Choice": 42.6, "Parsi Literature": 27.41, "BoolQA": 71.6, "Reading Comprehension": 19.7, "PartExpert": 35.92, "MMLU Pro": 22.2, "Iranian Social Norms": 61.83, "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", "Hub License": "llama3.1"}
34
+ {"Model": "Meta-Llama-3-8B", "model_name_for_query": "meta-llama/Meta-Llama-3-8B", "GeneralKnowledge": 47.7, "GSM8K": 10.3, "DC-Homograph": 41.67, "MC-Homograph": 74.42, "PiQA": 64.16, "Proverb-Quiz": 37.3, "VerbEval": 39.46, "Winogrande": 55.36, "Arc-Challenge": 62.07, "Arc-Easy": 75.83, "Feqh": 27.43, "Hallucination (Truthfulness)": 37.0, "P-Hellaswag": 76.49, "Law": 35.67, "AUT Multiple Choice": 42.5, "Parsi Literature": 28.19, "BoolQA": 75.2, "Reading Comprehension": 19.5, "PartExpert": 35.1, "MMLU Pro": 22.8, "Iranian Social Norms": 54.02, "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "8cde5ca8380496c9a6cc7ef3a8b46a0372a1d920", "Hub License": "llama3"}
35
+ {"Model": "Dorna-Llama3-8B-Instruct", "model_name_for_query": "PartAI/Dorna-Llama3-8B-Instruct", "GeneralKnowledge": 41.33, "GSM8K": 10.3, "DC-Homograph": 40.74, "MC-Homograph": 74.65, "PiQA": 66.17, "Proverb-Quiz": 35.41, "VerbEval": 34.74, "Winogrande": 56.16, "Arc-Challenge": 59.94, "Arc-Easy": 70.7, "Feqh": 29.14, "Hallucination (Truthfulness)": 31.49, "P-Hellaswag": 75.68, "Law": 25.33, "AUT Multiple Choice": 36.9, "Parsi Literature": 27.54, "BoolQA": 80.1, "Reading Comprehension": 21.8, "PartExpert": 34.49, "MMLU Pro": 22.0, "Iranian Social Norms": 69.39, "#Params (B)": 8.03, "Precision": "BF16", "Model sha": "fb268bb51b950b4db5b7c82c1b73d9e803020eed", "Hub License": "llama3"}
36
+ {"Model": "gemma-2-2b-it", "model_name_for_query": "google/gemma-2-2b-it", "GeneralKnowledge": 32.91, "GSM8K": 6.4, "DC-Homograph": 47.22, "MC-Homograph": 74.65, "PiQA": 66.87, "Proverb-Quiz": 45.68, "VerbEval": 36.18, "Winogrande": 54.74, "Arc-Challenge": 57.91, "Arc-Easy": 70.48, "Feqh": 25.71, "Hallucination (Truthfulness)": 39.02, "P-Hellaswag": 69.88, "Law": 32.67, "AUT Multiple Choice": 36.9, "Parsi Literature": 30.76, "BoolQA": 72.4, "Reading Comprehension": 0.3, "PartExpert": 31.31, "MMLU Pro": 18.2, "Iranian Social Norms": 40.18, "#Params (B)": 2.61, "Precision": "BF16", "Model sha": "299a8560bedf22ed1c72a8a11e7dce4a7f9f51f8", "Hub License": "gemma"}
37
+ {"Model": "PersianMind-v1.0", "model_name_for_query": "universitytehran/PersianMind-v1.0", "GeneralKnowledge": 30.61, "GSM8K": 2.3, "DC-Homograph": 41.67, "MC-Homograph": 65.9, "PiQA": 59.76, "Proverb-Quiz": 34.32, "VerbEval": 26.26, "Winogrande": 52.17, "Arc-Challenge": 54.59, "Arc-Easy": 69.73, "Feqh": 26.29, "Hallucination (Truthfulness)": 2.37, "P-Hellaswag": 63.78, "Law": 27.33, "AUT Multiple Choice": 36.1, "Parsi Literature": 27.8, "BoolQA": 66.3, "Reading Comprehension": "-", "PartExpert": 29.75, "MMLU Pro": 14.5, "Iranian Social Norms": 48.41, "#Params (B)": 0.0, "Precision": "F32", "Model sha": "af603eeb074138e2a613fbc95d89f018afbd3041", "Hub License": "cc-by-nc-sa-4.0"}
38
+ {"Model": "gemma-3-1b-it", "model_name_for_query": "google/gemma-3-1b-it", "GeneralKnowledge": 26.02, "GSM8K": 4.3, "DC-Homograph": 49.07, "MC-Homograph": 51.15, "PiQA": 57.66, "Proverb-Quiz": 28.92, "VerbEval": 27.67, "Winogrande": 50.58, "Arc-Challenge": 36.43, "Arc-Easy": 46.1, "Feqh": 28.0, "Hallucination (Truthfulness)": 54.94, "P-Hellaswag": 63.92, "Law": 20.33, "AUT Multiple Choice": 29.1, "Parsi Literature": 24.97, "BoolQA": 63.9, "Reading Comprehension": 2.1, "PartExpert": 27.22, "MMLU Pro": 13.7, "Iranian Social Norms": 51.22, "#Params (B)": 0.99, "Precision": "BF16", "Model sha": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", "Hub License": "gemma"}
39
+ {"Model": "Llama-3.2-1B-Instruct", "model_name_for_query": "meta-llama/Llama-3.2-1B-Instruct", "GeneralKnowledge": 29.59, "GSM8K": 4.1, "DC-Homograph": 50.93, "MC-Homograph": 52.53, "PiQA": 54.05, "Proverb-Quiz": 28.65, "VerbEval": 26.11, "Winogrande": 49.07, "Arc-Challenge": 37.5, "Arc-Easy": 47.38, "Feqh": 31.43, "Hallucination (Truthfulness)": 3.34, "P-Hellaswag": 55.4, "Law": 24.0, "AUT Multiple Choice": 29.9, "Parsi Literature": 27.03, "BoolQA": 64.1, "Reading Comprehension": 7.2, "PartExpert": 28.59, "MMLU Pro": 15.7, "Iranian Social Norms": 37.44, "#Params (B)": 1.23, "Precision": "BF16", "Model sha": "9213176726f574b556790deb65791e0c5aa438b6", "Hub License": "llama3.2"}
40
+ {"Model": "Maral-7B-alpha-1", "model_name_for_query": "MaralGPT/Maral-7B-alpha-1", "GeneralKnowledge": 31.63, "GSM8K": 6.1, "DC-Homograph": 43.52, "MC-Homograph": 47.47, "PiQA": 51.95, "Proverb-Quiz": 22.16, "VerbEval": 28.96, "Winogrande": 49.42, "Arc-Challenge": 37.29, "Arc-Easy": 43.1, "Feqh": 26.29, "Hallucination (Truthfulness)": 0.0, "P-Hellaswag": 60.18, "Law": 26.33, "AUT Multiple Choice": 28.4, "Parsi Literature": 26.77, "BoolQA": 62.7, "Reading Comprehension": 10.8, "PartExpert": 27.1, "MMLU Pro": 14.8, "Iranian Social Norms": 24.63, "#Params (B)": 7.24, "Precision": "BF16", "Model sha": "2ab5ca2a0d1a4454a78b4ca911e595bb9da2fe2f", "Hub License": "mit"}