diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -1,14166 +1,57002 @@ -{ - "tasks": [ +[ + [ { - "task": "classification", - "metric": "accuracy", - "score": 0.5353333333333333, - "bcp_47": 28, - "model": 12 + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5679608237702286, + "sentence_nr": 0 }, { - "task": "language_modeling", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9172787041570468, - "bcp_47": 28, - "model": 12 - }, + "score": 0.746881923400435, + "sentence_nr": 0 + } + ], + [ { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.31440758611811165, - "bcp_47": 28, - "model": 12 + "score": 0.4438455475739657, + "sentence_nr": 0 }, { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.4776446463431595, - "bcp_47": 28, - "model": 12 + "score": 0.6320800718582147, + "sentence_nr": 0 } ], - "models": [ + [ { - "model": "amazon/nova-micro-v1", - "task": "classification", - "metric": "accuracy", - "score": 0.5166666666666666, - "bcp_47": 2 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5894973558751632, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "task": "language_modeling", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9358428899682786, - "bcp_47": 2 - }, + "score": 0.7562097956860054, + "sentence_nr": 0 + } + ], + [ { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.40042093531509637, - "bcp_47": 2 + "score": 0.3846086976522069, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.5642142196700637, - "bcp_47": 2 - }, + "score": 0.5835344719191324, + "sentence_nr": 0 + } + ], + [ { - "model": "google/gemini-2.0-flash-001", - "task": "classification", - "metric": "accuracy", - "score": 0.8666666666666667, - "bcp_47": 2 + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4804215535486392, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "task": "language_modeling", + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9585325034195884, - "bcp_47": 2 - }, + "score": 0.6694735319785804, + "sentence_nr": 0 + } + ], + [ { - "model": "google/gemini-2.0-flash-001", + "model": "mistralai/mistral-nemo", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.4523562354788243, - "bcp_47": 2 + "score": 0.2511517944602615, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", + "model": "mistralai/mistral-nemo", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.5828490054615683, - "bcp_47": 2 - }, + "score": 0.4484633445384819, + "sentence_nr": 0 + } + ], + [ { - "model": "google/gemini-2.0-flash-lite-001", - "task": "classification", - "metric": "accuracy", - "score": 0.7333333333333333, - "bcp_47": 2 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5820808184424484, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "task": "language_modeling", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9574729426945592, - "bcp_47": 2 - }, + "score": 0.73788733854976, + "sentence_nr": 0 + } + ], + [ { "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.40085159165111883, - "bcp_47": 2 + "score": 0.5749603738163459, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.5422821788946908, - "bcp_47": 2 - }, - { - "model": "google/gemma-3-27b-it", - "task": "classification", - "metric": "accuracy", - "score": 0.7166666666666666, - "bcp_47": 2 - }, - { - "model": "google/gemma-3-27b-it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9402106879094536, - "bcp_47": 2 - }, + "score": 0.7240488251574404, + "sentence_nr": 0 + } + ], + [ { "model": "google/gemma-3-27b-it", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.3748623797480871, - "bcp_47": 2 + "score": 0.5617561349997696, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.5376336154503363, - "bcp_47": 2 - }, + "score": 0.7132694856647042, + "sentence_nr": 0 + } + ], + [ { - "model": "meta-llama/llama-3-70b-instruct", - "task": "classification", - "metric": "accuracy", - "score": 0.8, - "bcp_47": 2 + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2963216580569375, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "task": "language_modeling", + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9555990324827045, - "bcp_47": 2 - }, + "score": 0.5101500486835966, + "sentence_nr": 0 + } + ], + [ { - "model": "meta-llama/llama-3-70b-instruct", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.25148401884229143, - "bcp_47": 2 + "score": 0.15317719477157257, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.4285750600098188, - "bcp_47": 2 - }, + "score": 0.38800976493585004, + "sentence_nr": 0 + } + ], + [ { - "model": "meta-llama/llama-3.1-70b-instruct", - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "bcp_47": 2 + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6001453932849357, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "task": "language_modeling", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9458265879125298, - "bcp_47": 2 - }, + "score": 0.762029391170019, + "sentence_nr": 0 + } + ], + [ { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.4318584195195329, - "bcp_47": 2 + "score": 0.30676942927198475, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.5679592059634284, - "bcp_47": 2 - }, + "score": 0.4968492831219663, + "sentence_nr": 0 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "task": "classification", - "metric": "accuracy", - "score": 0.5142857142857143, - "bcp_47": 28 + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.32063971770635635, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "task": "language_modeling", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9422717613037961, - "bcp_47": 28 - }, + "score": 0.5206258401513325, + "sentence_nr": 0 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.28045810258852616, - "bcp_47": 28 + "score": 0.39086127104761287, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.45694310450071773, - "bcp_47": 28 - }, + "score": 0.6239956806265569, + "sentence_nr": 0 + } + ], + [ { - "model": "microsoft/phi-4-multimodal-instruct", - "task": "classification", - "metric": "accuracy", - "score": 0.4166666666666667, - "bcp_47": 2 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3020679767949182, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "task": "language_modeling", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.8811352896333067, - "bcp_47": 2 - }, + "score": 0.5246291817407542, + "sentence_nr": 0 + } + ], + [ { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.31733056990581465, - "bcp_47": 2 + "score": 0.29261990846502584, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.45631576469060464, - "bcp_47": 2 - }, + "score": 0.5207965578474395, + "sentence_nr": 0 + } + ], + [ { "model": "mistralai/mistral-nemo", - "task": "classification", - "metric": "accuracy", - "score": 0.5, - "bcp_47": 2 + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23343658187420896, + "sentence_nr": 0 }, { "model": "mistralai/mistral-nemo", - "task": "language_modeling", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.8612477844203897, - "bcp_47": 2 - }, + "score": 0.5188968707275573, + "sentence_nr": 0 + } + ], + [ { - "model": "mistralai/mistral-nemo", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.3177444138044378, - "bcp_47": 2 + "score": 0.2920008662633279, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.49319228717306784, - "bcp_47": 2 - }, + "score": 0.47119207959541226, + "sentence_nr": 0 + } + ], + [ { - "model": "mistralai/mistral-small-24b-instruct-2501", - "task": "classification", - "metric": "accuracy", - "score": 0.55, - "bcp_47": 2 + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2596939072050362, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "task": "language_modeling", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.8782400543225595, - "bcp_47": 2 - }, + "score": 0.4394574387008692, + "sentence_nr": 0 + } + ], + [ { - "model": "mistralai/mistral-small-24b-instruct-2501", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.37837115628691054, - "bcp_47": 2 + "score": 0.4273817965049865, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.5213024503486642, - "bcp_47": 2 - }, + "score": 0.6016204186733703, + "sentence_nr": 0 + } + ], + [ { - "model": "openai/gpt-4o-mini", - "task": "classification", - "metric": "accuracy", - "score": 0.5166666666666666, - "bcp_47": 2 + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "task": "language_modeling", + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9560369064537906, - "bcp_47": 2 - }, + "score": 0.0, + "sentence_nr": 0 + } + ], + [ { - "model": "openai/gpt-4o-mini", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.3940120225834043, - "bcp_47": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.5452510379336759, - "bcp_47": 2 - }, - { - "model": "qwen/qwq-32b", - "task": "classification", - "metric": "accuracy", "score": 0.0, - "bcp_47": 2 - }, - { - "model": "qwen/qwq-32b", - "task": "language_modeling", - "metric": "chrf", - "score": 0.47001826645586636, - "bcp_47": 2 - }, + "sentence_nr": 0 + } + ], + [ { - "model": "qwen/qwq-32b", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.2144844735779058, - "bcp_47": 2 + "score": 0.2777551012631926, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.30433786997302065, - "bcp_47": 2 + "score": 0.49423240120783246, + "sentence_nr": 0 } ], - "languages": [ - { - "bcp_47": "aa", - "speakers": 2119663, - "language_name": "Afar", - "autonym": "Afar", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "aa", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, + [ { - "bcp_47": "ab", - "speakers": 91953, - "language_name": "Abkhazian", - "autonym": "Аԥсшәа", - "family": "Abkhaz-Adyge", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 67.0, - "commonvoice_locale": "ab", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "abr", - "speakers": 1467010, - "language_name": "Abron", - "autonym": "Abron", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.7964573357809173, + "sentence_nr": 0 }, { - "bcp_47": "ace", - "speakers": 3738364, - "language_name": "Achinese", - "autonym": "Achinese", - "family": "Austronesian", - "flores_path": "ace_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "ace", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ach", - "speakers": 1600361, - "language_name": "Acoli", - "autonym": "Acoli", - "family": "Nilotic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ada", - "speakers": 880206, - "language_name": "Adangme", - "autonym": "Adangme", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ady", - "speakers": 444583, - "language_name": "Adyghe", - "autonym": "Adyghe", - "family": "Abkhaz-Adyge", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 12.0, - "commonvoice_locale": "ady", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "aeb", - "speakers": 10549080, - "language_name": "Tunisian Arabic", - "autonym": "Tunisian Arabic", - "family": "Afro-Asiatic", - "flores_path": "aeb_Arab", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "af", - "speakers": 9318845, - "language_name": "Afrikaans", - "autonym": "Afrikaans", - "family": "Indo-European", - "flores_path": "afr_Latn", - "fleurs_tag": "af_za", - "commonvoice_hours": 0.5, - "commonvoice_locale": "af", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "agq", - "speakers": 38843, - "language_name": "Aghem", - "autonym": "Aghem", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ak", - "speakers": 11442678, - "language_name": "Akan", - "autonym": "Akan", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.1, - "commonvoice_locale": "tw", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "aln", - "speakers": 1430250, - "language_name": "Gheg Albanian", - "autonym": "Gheg Albanian", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "aln", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "alt", - "speakers": 19841, - "language_name": "Southern Altai", - "autonym": "Southern Altai", - "family": "Turkic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "am", - "speakers": 35728475, - "language_name": "Amharic", - "autonym": "አማርኛ", - "family": "Afro-Asiatic", - "flores_path": "amh_Ethi", - "fleurs_tag": "am_et", - "commonvoice_hours": 1.8, - "commonvoice_locale": "am", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "amo", - "speakers": 18620, - "language_name": "Amo", - "autonym": "Amo", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "an", - "speakers": 26008, - "language_name": "Aragonese", - "autonym": "Aragonés", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 14.0, - "commonvoice_locale": "an", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ann", - "speakers": 0, - "language_name": "Obolo", - "autonym": "Obolo", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "aoz", - "speakers": 720970, - "language_name": "Uab Meto", - "autonym": "Uab Meto", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "apc", - "speakers": 39031474, - "language_name": "North Levantine Arabic", - "autonym": "العامية", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.8458636471716781, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "ar", - "speakers": 351664197, - "language_name": "Arabic", - "autonym": "العربية", - "family": "Afro-Asiatic", - "flores_path": "arb_Arab", - "fleurs_tag": "ar_eg", - "commonvoice_hours": 92.0, - "commonvoice_locale": "ar", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "model": 1.0 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.34633672321253084, + "sentence_nr": 0 }, { - "bcp_47": "ar", - "speakers": 351664197, - "language_name": "Arabic", - "autonym": "العربية", - "family": "Afro-Asiatic", - "flores_path": "arb_Arab", - "fleurs_tag": "ar_eg", - "commonvoice_hours": 92.0, - "commonvoice_locale": "ar", - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", "metric": "chrf", - "score": 0.9392314289764625, - "model": 1.0 - }, + "score": 0.5378805625051344, + "sentence_nr": 0 + } + ], + [ { + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "speakers": 351664197, - "language_name": "Arabic", - "autonym": "العربية", - "family": "Afro-Asiatic", - "flores_path": "arb_Arab", - "fleurs_tag": "ar_eg", - "commonvoice_hours": 92.0, - "commonvoice_locale": "ar", - "in_benchmark": true, "task": "translation", "metric": "bleu", - "score": 0.2837250166554738, - "model": 1.0 + "score": 0.3582301850807646, + "sentence_nr": 0 }, { + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", - "speakers": 351664197, - "language_name": "Arabic", - "autonym": "العربية", - "family": "Afro-Asiatic", - "flores_path": "arb_Arab", - "fleurs_tag": "ar_eg", - "commonvoice_hours": 92.0, - "commonvoice_locale": "ar", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4684314458952127, - "model": 1.0 - }, - { - "bcp_47": "arn", - "speakers": 272802, - "language_name": "Mapuche", - "autonym": "Mapudungun", - "family": "Araucanian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "arn", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "aro", - "speakers": 105, - "language_name": "Araona", - "autonym": "Araona", - "family": "Pano-Tacanan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "arq", - "speakers": 35667507, - "language_name": "Algerian Arabic", - "autonym": "Algerian Arabic", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ars", - "speakers": 1025205, - "language_name": "Najdi Arabic", - "autonym": "Najdi Arabic", - "family": "Afro-Asiatic", - "flores_path": "ars_Arab", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ary", - "speakers": 30938679, - "language_name": "Moroccan Arabic", - "autonym": "Moroccan Arabic", - "family": "Afro-Asiatic", - "flores_path": "ary_Arab", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "task": "translation", + "metric": "chrf", + "score": 0.5380305837807603, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "arz", - "speakers": 66639360, - "language_name": "Egyptian Arabic", - "autonym": "Egyptian Arabic", - "family": "Afro-Asiatic", - "flores_path": "arz_Arab", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "model": 1.0 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.300740577257699, + "sentence_nr": 0 }, { - "bcp_47": "arz", - "speakers": 66639360, - "language_name": "Egyptian Arabic", - "autonym": "Egyptian Arabic", - "family": "Afro-Asiatic", - "flores_path": "arz_Arab", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", "metric": "chrf", - "score": 0.930329195667362, - "model": 1.0 - }, + "score": 0.5272774705181614, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "arz", - "speakers": 66639360, - "language_name": "Egyptian Arabic", - "autonym": "Egyptian Arabic", - "family": "Afro-Asiatic", - "flores_path": "arz_Arab", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.19793415292805128, - "model": 1.0 + "score": 0.3099603853356145, + "sentence_nr": 0 }, { - "bcp_47": "arz", - "speakers": 66639360, - "language_name": "Egyptian Arabic", - "autonym": "Egyptian Arabic", - "family": "Afro-Asiatic", - "flores_path": "arz_Arab", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.3881278724939126, - "model": 1.0 - }, - { - "bcp_47": "as", - "speakers": 17239170, - "language_name": "Assamese", - "autonym": "অসমীয়া", - "family": "Indo-European", - "flores_path": "asm_Beng", - "fleurs_tag": "as_in", - "commonvoice_hours": 2.8, - "commonvoice_locale": "as", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "asa", - "speakers": 702634, - "language_name": "Asu", - "autonym": "Kipare", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ast", - "speakers": 650205, - "language_name": "Asturian", - "autonym": "Asturianu", - "family": "Indo-European", - "flores_path": "ast_Latn", - "fleurs_tag": "ast_es", - "commonvoice_hours": 0.8, - "commonvoice_locale": "ast", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "atj", - "speakers": 6408, - "language_name": "Atikamekw", - "autonym": "Atikamekw", - "family": "Algic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "av", - "speakers": 552716, - "language_name": "Avaric", - "autonym": "Avaric", - "family": "Nakh-Daghestanian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "awa", - "speakers": 25862924, - "language_name": "Awadhi", - "autonym": "Awadhi", - "family": "Indo-European", - "flores_path": "awa_Deva", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ay", - "speakers": 2838620, - "language_name": "Aymara", - "autonym": "Aymara", - "family": "Aymaran", - "flores_path": "ayr_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "az", - "speakers": 32446682, - "language_name": "Azerbaijani", - "autonym": "Azərbaycan", - "family": "Turkic", - "flores_path": "azj_Latn", - "fleurs_tag": "az_az", - "commonvoice_hours": 0.5, - "commonvoice_locale": "az", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ba", - "speakers": 1842386, - "language_name": "Bashkir", - "autonym": "Башҡорт Теле", - "family": "Turkic", - "flores_path": "bak_Cyrl", - "fleurs_tag": null, - "commonvoice_hours": 259.0, - "commonvoice_locale": "ba", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bal", - "speakers": 8227887, - "language_name": "Baluchi", - "autonym": "بلۆچی", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "bal", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ban", - "speakers": 4806468, - "language_name": "Balinese", - "autonym": "Balinese", - "family": "Austronesian", - "flores_path": "ban_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bap", - "speakers": 454918, - "language_name": "Bantawa", - "autonym": "Bantawa", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bar", - "speakers": 22043627, - "language_name": "Bavarian", - "autonym": "Bavarian", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bas", - "speakers": 332940, - "language_name": "Basaa", - "autonym": "Ɓàsàa", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 12.0, - "commonvoice_locale": "bas", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bax", - "speakers": 332940, - "language_name": "Bamun", - "autonym": "Bamun", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 11.0, - "commonvoice_locale": "bax", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bbc", - "speakers": 2456639, - "language_name": "Batak Toba", - "autonym": "Batak Toba", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bbj", - "speakers": 388430, - "language_name": "Ghomala", - "autonym": "Ghomala", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 13.0, - "commonvoice_locale": "bbj", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bci", - "speakers": 3022921, - "language_name": "Baoulé", - "autonym": "Baoulé", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 2.0, - "commonvoice_locale": "bci", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "be", - "speakers": 10064517, - "language_name": "Belarusian", - "autonym": "Беларуская", - "family": "Indo-European", - "flores_path": "bel_Cyrl", - "fleurs_tag": "be_by", - "commonvoice_hours": 1805.0, - "commonvoice_locale": "be", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bej", - "speakers": 2460326, - "language_name": "Beja", - "autonym": "Beja", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bem", - "speakers": 5402246, - "language_name": "Bemba", - "autonym": "Ichibemba", - "family": "Atlantic-Congo", - "flores_path": "bem_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bew", - "speakers": 5607546, - "language_name": "Betawi", - "autonym": "Betawi", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "bew", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bez", - "speakers": 995398, - "language_name": "Bena", - "autonym": "Hibena", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bfd", - "speakers": 158146, - "language_name": "Bafut", - "autonym": "Bafut", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 11.0, - "commonvoice_locale": "bfd", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bfq", - "speakers": 305001, - "language_name": "Badaga", - "autonym": "Badaga", - "family": "Dravidian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bft", - "speakers": 502520, - "language_name": "Balti", - "autonym": "Balti", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 18.0, - "commonvoice_locale": "bft", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bfy", - "speakers": 654424, - "language_name": "Bagheli", - "autonym": "Bagheli", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bg", - "speakers": 7878315, - "language_name": "Bulgarian", - "autonym": "Български", - "family": "Indo-European", - "flores_path": "bul_Cyrl", - "fleurs_tag": "bg_bg", - "commonvoice_hours": 16.0, - "commonvoice_locale": "bg", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bgc", - "speakers": 15913080, - "language_name": "Haryanvi", - "autonym": "हरियाणवी", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bgn", - "speakers": 2037382, - "language_name": "Western Balochi", - "autonym": "بلوچی (رخشانی)", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bgx", - "speakers": 377280, - "language_name": "Balkan Gagauz Turkish", - "autonym": "Balkan Gagauz Turkish", - "family": "Turkic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bhb", - "speakers": 1591308, - "language_name": "Bhili", - "autonym": "Bhili", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bhi", - "speakers": 1220003, - "language_name": "Bhilali", - "autonym": "Bhilali", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bho", - "speakers": 32934797, - "language_name": "Bhojpuri", - "autonym": "भोजपुरी", - "family": "Indo-European", - "flores_path": "bho_Deva", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bi", - "speakers": 268500, - "language_name": "Bislama", - "autonym": "Bislama", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bik", - "speakers": 3275430, - "language_name": "Bikol", - "autonym": "Bikol", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bin", - "speakers": 1519599, - "language_name": "Bini", - "autonym": "Bini", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bjj", - "speakers": 7426104, - "language_name": "Kanauji", - "autonym": "Kanauji", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bjn", - "speakers": 4010288, - "language_name": "Banjar", - "autonym": "Banjar", - "family": "Austronesian", - "flores_path": "bjn_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bjt", - "speakers": 95992, - "language_name": "Balanta-Ganja", - "autonym": "Balanta-Ganja", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bkm", - "speakers": 360685, - "language_name": "Kom", - "autonym": "Kom", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 13.0, - "commonvoice_locale": "bkm", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bku", - "speakers": 7970, - "language_name": "Buhid", - "autonym": "Buhid", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bla", - "speakers": 4900, - "language_name": "Siksiká", - "autonym": "Siksiká", - "family": "Algic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "blo", - "speakers": 51507, - "language_name": "Anii", - "autonym": "Anii Kagɩja", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "blt", - "speakers": 681177, - "language_name": "Tai Dam", - "autonym": "ꪼꪕꪒꪾ", - "family": "Tai-Kadai", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bm", - "speakers": 9385632, - "language_name": "Bambara", - "autonym": "Bamanakan", - "family": "Mande", - "flores_path": "bam_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "bm", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bmq", - "speakers": 168159, - "language_name": "Bomu", - "autonym": "Bomu", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5209233176748354, + "sentence_nr": 0 + } + ], + [ { + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", - "speakers": 267193288, - "language_name": "Bangla", - "autonym": "বাংলা", - "family": "Indo-European", - "flores_path": "ben_Beng", - "fleurs_tag": "bn_in", - "commonvoice_hours": 49.0, - "commonvoice_locale": "bn", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "model": 1.0 + "task": "translation", + "metric": "bleu", + "score": 0.35580399268816465, + "sentence_nr": 0 }, { + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", - "speakers": 267193288, - "language_name": "Bangla", - "autonym": "বাংলা", - "family": "Indo-European", - "flores_path": "ben_Beng", - "fleurs_tag": "bn_in", - "commonvoice_hours": 49.0, - "commonvoice_locale": "bn", - "in_benchmark": true, - "task": "language_modeling", + "task": "translation", "metric": "chrf", - "score": 0.9097658392566466, - "model": 1.0 - }, + "score": 0.5392592206305507, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "bn", - "speakers": 267193288, - "language_name": "Bangla", - "autonym": "বাংলা", - "family": "Indo-European", - "flores_path": "ben_Beng", - "fleurs_tag": "bn_in", - "commonvoice_hours": 49.0, - "commonvoice_locale": "bn", - "in_benchmark": true, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.20360260890869705, - "model": 1.0 + "score": 0.39317381456022266, + "sentence_nr": 0 }, { - "bcp_47": "bn", - "speakers": 267193288, - "language_name": "Bangla", - "autonym": "বাংলা", - "family": "Indo-European", - "flores_path": "ben_Beng", - "fleurs_tag": "bn_in", - "commonvoice_hours": 49.0, - "commonvoice_locale": "bn", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4076175886917154, - "model": 1.0 - }, - { - "bcp_47": "bo", - "speakers": 3006697, - "language_name": "Tibetan", - "autonym": "བོད་སྐད་", - "family": "Sino-Tibetan", - "flores_path": "bod_Tibt", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "bo", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bpy", - "speakers": 90174, - "language_name": "Bishnupriya", - "autonym": "Bishnupriya", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bqi", - "speakers": 1188926, - "language_name": "Bakhtiari", - "autonym": "Bakhtiari", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bqv", - "speakers": 46718, - "language_name": "Koro Wachi", - "autonym": "Koro Wachi", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "br", - "speakers": 563140, - "language_name": "Breton", - "autonym": "Brezhoneg", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 27.0, - "commonvoice_locale": "br", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bra", - "speakers": 54370, - "language_name": "Braj", - "autonym": "Braj", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "brh", - "speakers": 3035513, - "language_name": "Brahui", - "autonym": "Brahui", - "family": "Dravidian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "brx", - "speakers": 1856526, - "language_name": "Bodo", - "autonym": "बर’", - "family": "Sino-Tibetan", - "flores_path": "brx_Deva", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bs", - "speakers": 7594468, - "language_name": "Bosnian", - "autonym": "Bosanski", - "family": "Indo-European", - "flores_path": "bos_Latn", - "fleurs_tag": "bs_ba", - "commonvoice_hours": 0.0, - "commonvoice_locale": "bs", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bsc", - "speakers": 15264, - "language_name": "Bassari", - "autonym": "Bassari", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bss", - "speakers": 149823, - "language_name": "Akoose", - "autonym": "Akoose", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bto", - "speakers": 305707, - "language_name": "Rinconada Bikol", - "autonym": "Rinconada Bikol", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "btv", - "speakers": 78843, - "language_name": "Bateri", - "autonym": "Bateri", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bua", - "speakers": 311788, - "language_name": "Buriat", - "autonym": "Buriat", - "family": "Mongolic-Khitan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "bxr", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "buc", - "speakers": 44620, - "language_name": "Bushi", - "autonym": "Bushi", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bug", - "speakers": 4298211, - "language_name": "Buginese", - "autonym": "Buginese", - "family": "Austronesian", - "flores_path": "bug_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bum", - "speakers": 1276270, - "language_name": "Bulu", - "autonym": "Bulu", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 9.5, - "commonvoice_locale": "bum", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bvb", - "speakers": 66058, - "language_name": "Bube", - "autonym": "Bube", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "byn", - "speakers": 79056, - "language_name": "Blin", - "autonym": "Blin", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "byv", - "speakers": 305195, - "language_name": "Medumba", - "autonym": "Medumba", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 14.0, - "commonvoice_locale": "byv", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "bze", - "speakers": 166204, - "language_name": "Jenaama Bozo", - "autonym": "Jenaama Bozo", - "family": "Mande", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ca", - "speakers": 8679139, - "language_name": "Catalan", - "autonym": "Català", - "family": "Indo-European", - "flores_path": "cat_Latn", - "fleurs_tag": "ca_es", - "commonvoice_hours": 2845.0, - "commonvoice_locale": "ca", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "cad", - "speakers": 0, - "language_name": "Caddo", - "autonym": "Caddo", - "family": "Caddoan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "cch", - "speakers": 44946, - "language_name": "Atsam", - "autonym": "Atsam", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ccp", - "speakers": 729137, - "language_name": "Chakma", - "autonym": "𑄌𑄋𑄴𑄟𑄳𑄦", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ce", - "speakers": 935365, - "language_name": "Chechen", - "autonym": "Нохчийн", - "family": "Nakh-Daghestanian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ceb", - "speakers": 26203440, - "language_name": "Cebuano", - "autonym": "Cebuano", - "family": "Austronesian", - "flores_path": "ceb_Latn", - "fleurs_tag": "ceb_ph", - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "cgg", - "speakers": 2335662, - "language_name": "Chiga", - "autonym": "Rukiga", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ch", - "speakers": 46325, - "language_name": "Chamorro", - "autonym": "Chamorro", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "chk", - "speakers": 30731, - "language_name": "Chuukese", - "autonym": "Chuukese", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "chm", - "speakers": 524371, - "language_name": "Mari", - "autonym": "Mari", - "family": "Uralic", - "flores_path": "mhr_Cyrl", - "fleurs_tag": null, - "commonvoice_hours": 282.0, - "commonvoice_locale": "mhr", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "cho", - "speakers": 10977, - "language_name": "Choctaw", - "autonym": "Chahta", - "family": "Muskogean", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "chp", - "speakers": 12816, - "language_name": "Chipewyan", - "autonym": "Chipewyan", - "family": "Athabaskan-Eyak-Tlingit", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "chr", - "speakers": 25613, - "language_name": "Cherokee", - "autonym": "Ꮳꮃꭹ", - "family": "Iroquoian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "cic", - "speakers": 0, - "language_name": "Chickasaw", - "autonym": "Chikashshanompaʼ", - "family": "Muskogean", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "cja", - "speakers": 270832, - "language_name": "Western Cham", - "autonym": "Western Cham", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "cjm", - "speakers": 87862, - "language_name": "Eastern Cham", - "autonym": "Eastern Cham", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ckb", - "speakers": 11086549, - "language_name": "Central Kurdish", - "autonym": "کوردیی ناوەندی", - "family": "Indo-European", - "flores_path": "ckb_Arab", - "fleurs_tag": "ckb_iq", - "commonvoice_hours": 135.0, - "commonvoice_locale": "ckb", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "clc", - "speakers": 867, - "language_name": "Chilcotin", - "autonym": "Chilcotin", - "family": "Athabaskan-Eyak-Tlingit", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "co", - "speakers": 162836, - "language_name": "Corsican", - "autonym": "Corsu", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "co", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "cps", - "speakers": 720595, - "language_name": "Capiznon", - "autonym": "Capiznon", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "cr", - "speakers": 9047, - "language_name": "Cree", - "autonym": "Cree", - "family": "Algic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "crg", - "speakers": 678, - "language_name": "Michif", - "autonym": "Michif", - "family": "Algic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "crh", - "speakers": 245968, - "language_name": "Crimean Tatar", - "autonym": "Crimean Tatar", - "family": "Turkic", - "flores_path": "crh_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "crh", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "crk", - "speakers": 4146, - "language_name": "Plains Cree", - "autonym": "Plains Cree", - "family": "Algic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "crl", - "speakers": 377, - "language_name": "Northern East Cree", - "autonym": "Northern East Cree", - "family": "Algic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "crs", - "speakers": 94061, - "language_name": "Seselwa Creole French", - "autonym": "Seselwa Creole French", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "cs", - "speakers": 13045532, - "language_name": "Czech", - "autonym": "Čeština", - "family": "Indo-European", - "flores_path": "ces_Latn", - "fleurs_tag": "cs_cz", - "commonvoice_hours": 74.0, - "commonvoice_locale": "cs", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "csb", - "speakers": 49767, - "language_name": "Kashubian", - "autonym": "Kashubian", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "csw", - "speakers": 1809, - "language_name": "Swampy Cree", - "autonym": "ᓀᐦᐃᓇᐍᐏᐣ", - "family": "Algic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "cu", - "speakers": 0, - "language_name": "Church Slavic", - "autonym": "Church Slavic", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "cv", - "speakers": 1842386, - "language_name": "Chuvash", - "autonym": "Чӑваш", - "family": "Turkic", - "flores_path": "chv_Cyrl", - "fleurs_tag": null, - "commonvoice_hours": 27.0, - "commonvoice_locale": "cv", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "cy", - "speakers": 884910, - "language_name": "Welsh", - "autonym": "Cymraeg", - "family": "Indo-European", - "flores_path": "cym_Latn", - "fleurs_tag": "cy_gb", - "commonvoice_hours": 124.0, - "commonvoice_locale": "cy", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "da", - "speakers": 7072056, - "language_name": "Danish", - "autonym": "Dansk", - "family": "Indo-European", - "flores_path": "dan_Latn", - "fleurs_tag": "da_dk", - "commonvoice_hours": 13.0, - "commonvoice_locale": "da", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dak", - "speakers": 20832, - "language_name": "Dakota", - "autonym": "Dakota", - "family": "Siouan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dar", - "speakers": 368477, - "language_name": "Dargwa", - "autonym": "Dargwa", - "family": "Nakh-Daghestanian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "dar", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dav", - "speakers": 438929, - "language_name": "Taita", - "autonym": "Kitaita", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 9.3, - "commonvoice_locale": "dav", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dcc", - "speakers": 13128291, - "language_name": "Deccan", - "autonym": "Deccan", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6026058740561834, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "de", - "speakers": 136350226, - "language_name": "German", - "autonym": "Deutsch", - "family": "Indo-European", - "flores_path": "deu_Latn", - "fleurs_tag": "de_de", - "commonvoice_hours": 1360.0, - "commonvoice_locale": "de", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6666666666666666, - "model": 1.0 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.48930936408255293, + "sentence_nr": 0 }, { - "bcp_47": "de", - "speakers": 136350226, - "language_name": "German", - "autonym": "Deutsch", - "family": "Indo-European", - "flores_path": "deu_Latn", - "fleurs_tag": "de_de", - "commonvoice_hours": 1360.0, - "commonvoice_locale": "de", - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", "metric": "chrf", - "score": 0.9594656177914042, - "model": 1.0 - }, + "score": 0.699085629239476, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "de", - "speakers": 136350226, - "language_name": "German", - "autonym": "Deutsch", - "family": "Indo-European", - "flores_path": "deu_Latn", - "fleurs_tag": "de_de", - "commonvoice_hours": 1360.0, - "commonvoice_locale": "de", - "in_benchmark": true, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.3207642359472324, - "model": 1.0 + "score": 0.3963410285961713, + "sentence_nr": 0 }, { - "bcp_47": "de", - "speakers": 136350226, - "language_name": "German", - "autonym": "Deutsch", - "family": "Indo-European", - "flores_path": "deu_Latn", - "fleurs_tag": "de_de", - "commonvoice_hours": 1360.0, - "commonvoice_locale": "de", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.49973270743869647, - "model": 1.0 - }, - { - "bcp_47": "den", - "speakers": 2299, - "language_name": "Slave", - "autonym": "Slave", - "family": "Athabaskan-Eyak-Tlingit", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dgr", - "speakers": 2111, - "language_name": "Dogrib", - "autonym": "Dogrib", - "family": "Athabaskan-Eyak-Tlingit", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dje", - "speakers": 3871308, - "language_name": "Zarma", - "autonym": "Zarmaciine", - "family": "Songhay", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dnj", - "speakers": 1099244, - "language_name": "Dan", - "autonym": "Dan", - "family": "Mande", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "doi", - "speakers": 2652180, - "language_name": "Dogri", - "autonym": "डोगरी", - "family": "Indo-European", - "flores_path": "dgo_Deva", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dsb", - "speakers": 6974, - "language_name": "Lower Sorbian", - "autonym": "Dolnoserbšćina", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "dsb", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dtm", - "speakers": 215087, - "language_name": "Tomo Kan Dogon", - "autonym": "Tomo Kan Dogon", - "family": "Dogon", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dtp", - "speakers": 182852, - "language_name": "Central Dusun", - "autonym": "Central Dusun", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dty", - "speakers": 758198, - "language_name": "Dotyali", - "autonym": "Dotyali", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dua", - "speakers": 133176, - "language_name": "Duala", - "autonym": "Duálá", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 14.0, - "commonvoice_locale": "dua", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dv", - "speakers": 388044, - "language_name": "Divehi", - "autonym": "Divehi", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 39.0, - "commonvoice_locale": "dv", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dyo", - "speakers": 409146, - "language_name": "Jola-Fonyi", - "autonym": "Joola", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dyu", - "speakers": 6667328, - "language_name": "Dyula", - "autonym": "Dyula", - "family": "Mande", - "flores_path": "dyu_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.4, - "commonvoice_locale": "dyu", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "dz", - "speakers": 370341, - "language_name": "Dzongkha", - "autonym": "རྫོང་ཁ", - "family": "Bookkeeping", - "flores_path": "dzo_Tibt", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ebu", - "speakers": 802918, - "language_name": "Embu", - "autonym": "Kĩembu", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ee", - "speakers": 4690857, - "language_name": "Ewe", - "autonym": "Eʋegbe", - "family": "Atlantic-Congo", - "flores_path": "ewe_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "ee", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "efi", - "speakers": 2996392, - "language_name": "Efik", - "autonym": "Efik", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "egl", - "speakers": 31201, - "language_name": "Emilian", - "autonym": "Emilian", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "el", - "speakers": 12292242, - "language_name": "Greek", - "autonym": "Ελληνικά", - "family": "Indo-European", - "flores_path": "ell_Grek", - "fleurs_tag": "el_gr", - "commonvoice_hours": 20.0, - "commonvoice_locale": "el", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "en", - "speakers": 1636485840, - "language_name": "English", - "autonym": "English", - "family": "Indo-European", - "flores_path": "eng_Latn", - "fleurs_tag": "en_us", - "commonvoice_hours": 2653.0, - "commonvoice_locale": "en", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5777777777777778, - "model": 12.0 - }, - { - "bcp_47": "en", - "speakers": 1636485840, - "language_name": "English", - "autonym": "English", - "family": "Indo-European", - "flores_path": "eng_Latn", - "fleurs_tag": "en_us", - "commonvoice_hours": 2653.0, - "commonvoice_locale": "en", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.9180269549823046, - "model": 12.0 - }, - { - "bcp_47": "en", - "speakers": 1636485840, - "language_name": "English", - "autonym": "English", - "family": "Indo-European", - "flores_path": "eng_Latn", - "fleurs_tag": "en_us", - "commonvoice_hours": 2653.0, - "commonvoice_locale": "en", - "in_benchmark": true, - "task": "translation", - "metric": "bleu", - "score": 0.3703633711863608, - "model": 12.0 - }, - { - "bcp_47": "en", - "speakers": 1636485840, - "language_name": "English", - "autonym": "English", - "family": "Indo-European", - "flores_path": "eng_Latn", - "fleurs_tag": "en_us", - "commonvoice_hours": 2653.0, - "commonvoice_locale": "en", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4880916692700535, - "model": 12.0 - }, - { - "bcp_47": "eo", - "speakers": 301, - "language_name": "Esperanto", - "autonym": "Esperanto", - "family": "Artificial Language", - "flores_path": "epo_Latn", - "fleurs_tag": null, - "commonvoice_hours": 1436.0, - "commonvoice_locale": "eo", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.613166190285915, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "es", - "speakers": 493528077, - "language_name": "Spanish", - "autonym": "Español", - "family": "Indo-European", - "flores_path": "spa_Latn", - "fleurs_tag": "es_419", - "commonvoice_hours": 446.0, - "commonvoice_locale": "es", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "model": 1.0 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.44294247711132617, + "sentence_nr": 0 }, { - "bcp_47": "es", - "speakers": 493528077, - "language_name": "Spanish", - "autonym": "Español", - "family": "Indo-European", - "flores_path": "spa_Latn", - "fleurs_tag": "es_419", - "commonvoice_hours": 446.0, - "commonvoice_locale": "es", - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", "metric": "chrf", - "score": 0.9569342865902168, - "model": 1.0 - }, + "score": 0.5915660675216782, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "es", - "speakers": 493528077, - "language_name": "Spanish", - "autonym": "Español", - "family": "Indo-European", - "flores_path": "spa_Latn", - "fleurs_tag": "es_419", - "commonvoice_hours": 446.0, - "commonvoice_locale": "es", - "in_benchmark": true, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.283646935447629, - "model": 1.0 + "score": 0.3756985486608933, + "sentence_nr": 0 }, { - "bcp_47": "es", - "speakers": 493528077, - "language_name": "Spanish", - "autonym": "Español", - "family": "Indo-European", - "flores_path": "spa_Latn", - "fleurs_tag": "es_419", - "commonvoice_hours": 446.0, - "commonvoice_locale": "es", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.46056393670415496, - "model": 1.0 - }, - { - "bcp_47": "esu", - "speakers": 20956, - "language_name": "Central Yupik", - "autonym": "Central Yupik", - "family": "Eskimo-Aleut", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 7.6, - "commonvoice_locale": "esu", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "et", - "speakers": 878449, - "language_name": "Estonian", - "autonym": "Eesti", - "family": "Uralic", - "flores_path": "ekk_Latn", - "fleurs_tag": "et_ee", - "commonvoice_hours": 58.0, - "commonvoice_locale": "et", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "eu", - "speakers": 1088519, - "language_name": "Basque", - "autonym": "Euskara", - "family": null, - "flores_path": "eus_Latn", - "fleurs_tag": null, - "commonvoice_hours": 336.0, - "commonvoice_locale": "eu", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ewo", - "speakers": 860095, - "language_name": "Ewondo", - "autonym": "Ewondo", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 19.0, - "commonvoice_locale": "ewo", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ext", - "speakers": 245077, - "language_name": "Extremaduran", - "autonym": "Extremaduran", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5991443770283833, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "fa", - "speakers": 84710459, - "language_name": "Persian", - "autonym": "فارسی", - "family": "Indo-European", - "flores_path": "pes_Arab", - "fleurs_tag": "fa_ir", - "commonvoice_hours": 370.0, - "commonvoice_locale": "fa", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "model": 1.0 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5009456904181451, + "sentence_nr": 0 }, { - "bcp_47": "fa", - "speakers": 84710459, - "language_name": "Persian", - "autonym": "فارسی", - "family": "Indo-European", - "flores_path": "pes_Arab", - "fleurs_tag": "fa_ir", - "commonvoice_hours": 370.0, - "commonvoice_locale": "fa", - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", "metric": "chrf", - "score": 0.9414698824984596, - "model": 1.0 - }, + "score": 0.6893719644090858, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "fa", - "speakers": 84710459, - "language_name": "Persian", - "autonym": "فارسی", - "family": "Indo-European", - "flores_path": "pes_Arab", - "fleurs_tag": "fa_ir", - "commonvoice_hours": 370.0, - "commonvoice_locale": "fa", - "in_benchmark": true, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.26232478733341374, - "model": 1.0 + "score": 0.18273944860385094, + "sentence_nr": 0 }, { - "bcp_47": "fa", - "speakers": 84710459, - "language_name": "Persian", - "autonym": "فارسی", - "family": "Indo-European", - "flores_path": "pes_Arab", - "fleurs_tag": "fa_ir", - "commonvoice_hours": 370.0, - "commonvoice_locale": "fa", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.44641220608314985, - "model": 1.0 - }, - { - "bcp_47": "fan", - "speakers": 426451, - "language_name": "Fang", - "autonym": "Fang", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 9.3, - "commonvoice_locale": "fan", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "fbl", - "speakers": 2511163, - "language_name": "West Albay Bikol", - "autonym": "West Albay Bikol", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ff", - "speakers": 7788904, - "language_name": "Fula", - "autonym": "Pulaar", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": "ff_sn", - "commonvoice_hours": 0.0, - "commonvoice_locale": "ff", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ffm", - "speakers": 1505612, - "language_name": "Maasina Fulfulde", - "autonym": "Maasina Fulfulde", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "fi", - "speakers": 5736842, - "language_name": "Finnish", - "autonym": "Suomi", - "family": "Uralic", - "flores_path": "fin_Latn", - "fleurs_tag": "fi_fi", - "commonvoice_hours": 15.0, - "commonvoice_locale": "fi", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "fia", - "speakers": 378161, - "language_name": "Nobiin", - "autonym": "Nobiin", - "family": "Nubian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.44261865187418153, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "fil", - "speakers": 67471096, - "language_name": "Filipino", - "autonym": "Filipino", - "family": "Austronesian", - "flores_path": "fil_Latn", - "fleurs_tag": "fil_ph", - "commonvoice_hours": 0.0, - "commonvoice_locale": "tl", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "model": 1.0 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.2153742037697241, + "sentence_nr": 0 }, { - "bcp_47": "fil", - "speakers": 67471096, - "language_name": "Filipino", - "autonym": "Filipino", - "family": "Austronesian", - "flores_path": "fil_Latn", - "fleurs_tag": "fil_ph", - "commonvoice_hours": 0.0, - "commonvoice_locale": "tl", - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", "metric": "chrf", - "score": 0.9402707475255596, - "model": 1.0 - }, + "score": 0.4581737688885401, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "fil", - "speakers": 67471096, - "language_name": "Filipino", - "autonym": "Filipino", - "family": "Austronesian", - "flores_path": "fil_Latn", - "fleurs_tag": "fil_ph", - "commonvoice_hours": 0.0, - "commonvoice_locale": "tl", - "in_benchmark": true, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.29097818784870333, - "model": 1.0 + "score": 0.3372953649368346, + "sentence_nr": 0 }, { - "bcp_47": "fil", - "speakers": 67471096, - "language_name": "Filipino", - "autonym": "Filipino", - "family": "Austronesian", - "flores_path": "fil_Latn", - "fleurs_tag": "fil_ph", - "commonvoice_hours": 0.0, - "commonvoice_locale": "tl", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.44978114149245985, - "model": 1.0 - }, - { - "bcp_47": "fit", - "speakers": 56114, - "language_name": "Tornedalen Finnish", - "autonym": "Tornedalen Finnish", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "fj", - "speakers": 365030, - "language_name": "Fijian", - "autonym": "Fijian", - "family": "Austronesian", - "flores_path": "fij_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "fo", - "speakers": 71351, - "language_name": "Faroese", - "autonym": "Føroyskt", - "family": "Indo-European", - "flores_path": "fao_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "fo", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "fon", - "speakers": 3216150, - "language_name": "Fon", - "autonym": "Fon", - "family": "Atlantic-Congo", - "flores_path": "fon_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5482505380106469, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "fr", - "speakers": 278611507, - "language_name": "French", - "autonym": "Français", - "family": "Indo-European", - "flores_path": "fra_Latn", - "fleurs_tag": "fr_fr", - "commonvoice_hours": 1053.0, - "commonvoice_locale": "fr", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "model": 1.0 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.28528905353056333, + "sentence_nr": 0 }, { - "bcp_47": "fr", - "speakers": 278611507, - "language_name": "French", - "autonym": "Français", - "family": "Indo-European", - "flores_path": "fra_Latn", - "fleurs_tag": "fr_fr", - "commonvoice_hours": 1053.0, - "commonvoice_locale": "fr", - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", "metric": "chrf", - "score": 0.9815706066541411, - "model": 1.0 - }, + "score": 0.4885812318466243, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "fr", - "speakers": 278611507, - "language_name": "French", - "autonym": "Français", - "family": "Indo-European", - "flores_path": "fra_Latn", - "fleurs_tag": "fr_fr", - "commonvoice_hours": 1053.0, - "commonvoice_locale": "fr", - "in_benchmark": true, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.3193235920661593, - "model": 1.0 + "score": 0.2935204022158406, + "sentence_nr": 0 }, { - "bcp_47": "fr", - "speakers": 278611507, - "language_name": "French", - "autonym": "Français", - "family": "Indo-European", - "flores_path": "fra_Latn", - "fleurs_tag": "fr_fr", - "commonvoice_hours": 1053.0, - "commonvoice_locale": "fr", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4875691290722964, - "model": 1.0 - }, - { - "bcp_47": "frc", - "speakers": 27942, - "language_name": "Cajun French", - "autonym": "Cajun French", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "frp", - "speakers": 63777, - "language_name": "Arpitan", - "autonym": "Arpitan", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "frr", - "speakers": 9619, - "language_name": "Northern Frisian", - "autonym": "Nordfriisk", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "frs", - "speakers": 2004, - "language_name": "Eastern Frisian", - "autonym": "Eastern Frisian", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "fud", - "speakers": 4756, - "language_name": "East Futuna", - "autonym": "East Futuna", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "fuq", - "speakers": 1594068, - "language_name": "Central-Eastern Niger Fulfulde", - "autonym": "Central-Eastern Niger Fulfulde", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "fur", - "speakers": 37442, - "language_name": "Friulian", - "autonym": "Furlan", - "family": "Indo-European", - "flores_path": "fur_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "fuv", - "speakers": 14339876, - "language_name": "Nigerian Fulfulde", - "autonym": "Nigerian Fulfulde", - "family": "Atlantic-Congo", - "flores_path": "fuv_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "fvr", - "speakers": 1230163, - "language_name": "Fur", - "autonym": "Fur", - "family": "Furan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "fy", - "speakers": 743057, - "language_name": "Western Frisian", - "autonym": "Frysk", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 66.0, - "commonvoice_locale": "fy-NL", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ga", - "speakers": 1237487, - "language_name": "Irish", - "autonym": "Gaeilge", - "family": "Indo-European", - "flores_path": "gle_Latn", - "fleurs_tag": "ga_ie", - "commonvoice_hours": 6.0, - "commonvoice_locale": "ga-IE", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gaa", - "speakers": 821526, - "language_name": "Ga", - "autonym": "Gã", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gag", - "speakers": 111028, - "language_name": "Gagauz", - "autonym": "Gagauz", - "family": "Turkic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gan", - "speakers": 23698340, - "language_name": "Gan Chinese", - "autonym": "Gan Chinese", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gay", - "speakers": 320431, - "language_name": "Gayo", - "autonym": "Gayo", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gbm", - "speakers": 3580443, - "language_name": "Garhwali", - "autonym": "Garhwali", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gbz", - "speakers": 7983, - "language_name": "Zoroastrian Dari", - "autonym": "Zoroastrian Dari", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gcr", - "speakers": 51872, - "language_name": "Guianese Creole French", - "autonym": "Guianese Creole French", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gd", - "speakers": 72337, - "language_name": "Scottish Gaelic", - "autonym": "Gàidhlig", - "family": "Indo-European", - "flores_path": "gla_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gez", - "speakers": 0, - "language_name": "Geez", - "autonym": "Geez", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gil", - "speakers": 67078, - "language_name": "Gilbertese", - "autonym": "Gilbertese", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gjk", - "speakers": 256851, - "language_name": "Kachi Koli", - "autonym": "Kachi Koli", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gju", - "speakers": 467002, - "language_name": "Gujari", - "autonym": "Gujari", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 11.0, - "commonvoice_locale": "gju", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gl", - "speakers": 3515530, - "language_name": "Galician", - "autonym": "Galego", - "family": "Indo-European", - "flores_path": "glg_Latn", - "fleurs_tag": "gl_es", - "commonvoice_hours": 111.0, - "commonvoice_locale": "gl", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "glk", - "speakers": 3906472, - "language_name": "Gilaki", - "autonym": "Gilaki", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gn", - "speakers": 5827107, - "language_name": "Guarani", - "autonym": "Avañe’Ẽ", - "family": "Tupian", - "flores_path": "gug_Latn", - "fleurs_tag": null, - "commonvoice_hours": 3.7, - "commonvoice_locale": "gn", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gom", - "speakers": 4243488, - "language_name": "Goan Konkani", - "autonym": "Goan Konkani", - "family": "Indo-European", - "flores_path": "gom_Deva", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "gom", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gon", - "speakers": 3182616, - "language_name": "Gondi", - "autonym": "Gondi", - "family": "Dravidian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gor", - "speakers": 1094807, - "language_name": "Gorontalo", - "autonym": "Gorontalo", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gos", - "speakers": 622094, - "language_name": "Gronings", - "autonym": "Gronings", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "gos", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "grt", - "speakers": 821563, - "language_name": "Garo", - "autonym": "Garo", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gsw", - "speakers": 7956952, - "language_name": "Swiss German", - "autonym": "Schwiizertüütsch", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "gsw", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4867597973247361, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "gu", - "speakers": 61721799, - "language_name": "Gujarati", - "autonym": "ગુજરાતી", - "family": "Indo-European", - "flores_path": "guj_Gujr", - "fleurs_tag": "gu_in", - "commonvoice_hours": 0.0, - "commonvoice_locale": "gu-IN", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "model": 1.0 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.2929684584911775, + "sentence_nr": 0 }, { - "bcp_47": "gu", - "speakers": 61721799, - "language_name": "Gujarati", - "autonym": "ગુજરાતી", - "family": "Indo-European", - "flores_path": "guj_Gujr", - "fleurs_tag": "gu_in", - "commonvoice_hours": 0.0, - "commonvoice_locale": "gu-IN", - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", "metric": "chrf", - "score": 0.938086045460355, - "model": 1.0 - }, + "score": 0.5038324436049059, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "gu", - "speakers": 61721799, - "language_name": "Gujarati", - "autonym": "ગુજરાતી", - "family": "Indo-European", - "flores_path": "guj_Gujr", - "fleurs_tag": "gu_in", - "commonvoice_hours": 0.0, - "commonvoice_locale": "gu-IN", - "in_benchmark": true, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.26442484966880464, - "model": 1.0 + "score": 0.4034224234291925, + "sentence_nr": 0 }, { - "bcp_47": "gu", - "speakers": 61721799, - "language_name": "Gujarati", - "autonym": "ગુજરાતી", - "family": "Indo-European", - "flores_path": "guj_Gujr", - "fleurs_tag": "gu_in", - "commonvoice_hours": 0.0, - "commonvoice_locale": "gu-IN", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.44452182973195975, - "model": 1.0 - }, - { - "bcp_47": "gub", - "speakers": 17784, - "language_name": "Guajajára", - "autonym": "Guajajára", - "family": "Tupian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "guc", - "speakers": 132529, - "language_name": "Wayuu", - "autonym": "Wayuu", - "family": "Arawakan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "guc", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gur", - "speakers": 1026907, - "language_name": "Frafra", - "autonym": "Frafra", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "guz", - "speakers": 2622867, - "language_name": "Gusii", - "autonym": "Ekegusii", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gv", - "speakers": 1719, - "language_name": "Manx", - "autonym": "Gaelg", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 11.0, - "commonvoice_locale": "gv", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gvr", - "speakers": 87951, - "language_name": "Gurung", - "autonym": "Gurung", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "gwi", - "speakers": 302, - "language_name": "Gwichʼin", - "autonym": "GwichʼIn", - "family": "Athabaskan-Eyak-Tlingit", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ha", - "speakers": 40411882, - "language_name": "Hausa", - "autonym": "Hausa", - "family": "Afro-Asiatic", - "flores_path": "hau_Latn", - "fleurs_tag": "ha_ng", - "commonvoice_hours": 4.1, - "commonvoice_locale": "ha", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hak", - "speakers": 32062460, - "language_name": "Hakka Chinese", - "autonym": "Hakka Chinese", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "haw", - "speakers": 29605, - "language_name": "Hawaiian", - "autonym": "ʻŌlelo HawaiʻI", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "haz", - "speakers": 2161984, - "language_name": "Hazaragi", - "autonym": "Hazaragi", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "he", - "speakers": 8675480, - "language_name": "Hebrew", - "autonym": "עברית", - "family": "Afro-Asiatic", - "flores_path": "heb_Hebr", - "fleurs_tag": "he_il", - "commonvoice_hours": 1.1, - "commonvoice_locale": "he", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hi", - "speakers": 546882144, - "language_name": "Hindi", - "autonym": "हिन्दी", - "family": "Indo-European", - "flores_path": "hin_Deva", - "fleurs_tag": "hi_in", - "commonvoice_hours": 16.0, - "commonvoice_locale": "hi-IN", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "model": 1.0 - }, - { - "bcp_47": "hi", - "speakers": 546882144, - "language_name": "Hindi", - "autonym": "हिन्दी", - "family": "Indo-European", - "flores_path": "hin_Deva", - "fleurs_tag": "hi_in", - "commonvoice_hours": 16.0, - "commonvoice_locale": "hi-IN", - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", "metric": "chrf", - "score": 0.9428185393832219, - "model": 1.0 - }, + "score": 0.5736798834726872, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "hi", - "speakers": 546882144, - "language_name": "Hindi", - "autonym": "हिन्दी", - "family": "Indo-European", - "flores_path": "hin_Deva", - "fleurs_tag": "hi_in", - "commonvoice_hours": 16.0, - "commonvoice_locale": "hi-IN", - "in_benchmark": true, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.31956422674397006, - "model": 1.0 + "score": 0.1077205146963877, + "sentence_nr": 0 }, { - "bcp_47": "hi", - "speakers": 546882144, - "language_name": "Hindi", - "autonym": "हिन्दी", - "family": "Indo-European", - "flores_path": "hin_Deva", - "fleurs_tag": "hi_in", - "commonvoice_hours": 16.0, - "commonvoice_locale": "hi-IN", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4896277852320754, - "model": 1.0 - }, - { - "bcp_47": "hif", - "speakers": 383749, - "language_name": "Fiji Hindi", - "autonym": "Fiji Hindi", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hil", - "speakers": 9171204, - "language_name": "Hiligaynon", - "autonym": "Ilonggo", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "hil", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hnd", - "speakers": 957354, - "language_name": "Southern Hindko", - "autonym": "Southern Hindko", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hne", - "speakers": 14586990, - "language_name": "Chhattisgarhi", - "autonym": "Chhattisgarhi", - "family": "Indo-European", - "flores_path": "hne_Deva", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hnj", - "speakers": 781687, - "language_name": "Hmong Njua", - "autonym": "𞄀𞄄𞄰𞄩𞄍𞄜𞄰", - "family": "Hmong-Mien", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hnn", - "speakers": 17469, - "language_name": "Hanunoo", - "autonym": "Hanunoo", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hno", - "speakers": 3969517, - "language_name": "Northern Hindko", - "autonym": "Northern Hindko", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 11.0, - "commonvoice_locale": "hno", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ho", - "speakers": 152449, - "language_name": "Hiri Motu", - "autonym": "Hiri Motu", - "family": "Pidgin", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hoc", - "speakers": 1312829, - "language_name": "Ho", - "autonym": "Ho", - "family": "Austroasiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hoj", - "speakers": 1087394, - "language_name": "Hadothi", - "autonym": "Hadothi", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hr", - "speakers": 6813164, - "language_name": "Croatian", - "autonym": "Hrvatski", - "family": "Indo-European", - "flores_path": "hrv_Latn", - "fleurs_tag": "hr_hr", - "commonvoice_hours": 0.0, - "commonvoice_locale": "hr", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hsb", - "speakers": 12826, - "language_name": "Upper Sorbian", - "autonym": "Hornjoserbšćina", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 2.9, - "commonvoice_locale": "hsb", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hsn", - "speakers": 40426580, - "language_name": "Xiang Chinese", - "autonym": "Xiang Chinese", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ht", - "speakers": 8964918, - "language_name": "Haitian Creole", - "autonym": "Haitian Creole", - "family": "Indo-European", - "flores_path": "hat_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "ht", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hu", - "speakers": 12443430, - "language_name": "Hungarian", - "autonym": "Magyar", - "family": "Uralic", - "flores_path": "hun_Latn", - "fleurs_tag": "hu_hu", - "commonvoice_hours": 92.0, - "commonvoice_locale": "hu", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hur", - "speakers": 716, - "language_name": "Halkomelem", - "autonym": "Halkomelem", - "family": "Salishan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hy", - "speakers": 5317273, - "language_name": "Armenian", - "autonym": "Հայերեն", - "family": "Indo-European", - "flores_path": "hye_Armn", - "fleurs_tag": "hy_am", - "commonvoice_hours": 31.0, - "commonvoice_locale": "hy-AM", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "hz", - "speakers": 239336, - "language_name": "Herero", - "autonym": "Herero", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ia", - "speakers": 136, - "language_name": "Interlingua", - "autonym": "Interlingua", - "family": "Artificial Language", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 14.0, - "commonvoice_locale": "ia", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "iba", - "speakers": 816302, - "language_name": "Iban", - "autonym": "Iban", - "family": "Bookkeeping", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ibb", - "speakers": 2996392, - "language_name": "Ibibio", - "autonym": "Ibibio", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 2.6, - "commonvoice_locale": "ibb", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.428338145564396, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "id", - "speakers": 171207687, - "language_name": "Indonesian", - "autonym": "Bahasa Indonesia", - "family": "Austronesian", - "flores_path": "ind_Latn", - "fleurs_tag": "id_id", - "commonvoice_hours": 33.0, - "commonvoice_locale": "id", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "model": 1.0 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.22327767951697297, + "sentence_nr": 0 }, { - "bcp_47": "id", - "speakers": 171207687, - "language_name": "Indonesian", - "autonym": "Bahasa Indonesia", - "family": "Austronesian", - "flores_path": "ind_Latn", - "fleurs_tag": "id_id", - "commonvoice_hours": 33.0, - "commonvoice_locale": "id", - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", "metric": "chrf", - "score": 0.9598314474300775, - "model": 1.0 - }, + "score": 0.4063556880747369, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "id", - "speakers": 171207687, - "language_name": "Indonesian", - "autonym": "Bahasa Indonesia", - "family": "Austronesian", - "flores_path": "ind_Latn", - "fleurs_tag": "id_id", - "commonvoice_hours": 33.0, - "commonvoice_locale": "id", - "in_benchmark": true, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.29354203311552335, - "model": 1.0 + "score": 0.2572733200413211, + "sentence_nr": 0 }, { - "bcp_47": "id", - "speakers": 171207687, - "language_name": "Indonesian", - "autonym": "Bahasa Indonesia", - "family": "Austronesian", - "flores_path": "ind_Latn", - "fleurs_tag": "id_id", - "commonvoice_hours": 33.0, - "commonvoice_locale": "id", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4723493656022861, - "model": 1.0 - }, - { - "bcp_47": "ie", - "speakers": 1, - "language_name": "Interlingue", - "autonym": "Interlingue", - "family": "Artificial Language", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "ie", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ife", - "speakers": 111910, - "language_name": "Ifè", - "autonym": "Ifè", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ig", - "speakers": 27823640, - "language_name": "Igbo", - "autonym": "Igbo", - "family": "Atlantic-Congo", - "flores_path": "ibo_Latn", - "fleurs_tag": "ig_ng", - "commonvoice_hours": 0.0, - "commonvoice_locale": "ig", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ii", - "speakers": 8364120, - "language_name": "Sichuan Yi", - "autonym": "ꆈꌠꉙ", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ik", - "speakers": 7983, - "language_name": "Inupiaq", - "autonym": "Inupiaq", - "family": "Eskimo-Aleut", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 7.2, - "commonvoice_locale": "ipk", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ilo", - "speakers": 10481376, - "language_name": "Iloko", - "autonym": "Ilokano", - "family": "Austronesian", - "flores_path": "ilo_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "inh", - "speakers": 226755, - "language_name": "Ingush", - "autonym": "Ingush", - "family": "Nakh-Daghestanian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "io", - "speakers": 0, - "language_name": "Ido", - "autonym": "Ido", - "family": "Artificial Language", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "is", - "speakers": 350734, - "language_name": "Icelandic", - "autonym": "Íslenska", - "family": "Indo-European", - "flores_path": "isl_Latn", - "fleurs_tag": "is_is", - "commonvoice_hours": 0.1, - "commonvoice_locale": "is", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.4520014138562526, + "sentence_nr": 0 + } + ], + [ { + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", - "speakers": 70247060, - "language_name": "Italian", - "autonym": "Italiano", - "family": "Indo-European", - "flores_path": "ita_Latn", - "fleurs_tag": "it_it", - "commonvoice_hours": 362.0, - "commonvoice_locale": "it", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6666666666666666, - "model": 1.0 + "task": "translation", + "metric": "bleu", + "score": 0.40311197004738203, + "sentence_nr": 0 }, { + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", - "speakers": 70247060, - "language_name": "Italian", - "autonym": "Italiano", - "family": "Indo-European", - "flores_path": "ita_Latn", - "fleurs_tag": "it_it", - "commonvoice_hours": 362.0, - "commonvoice_locale": "it", - "in_benchmark": true, - "task": "language_modeling", + "task": "translation", "metric": "chrf", - "score": 0.9439490413212892, - "model": 1.0 - }, + "score": 0.5788525108956781, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "it", - "speakers": 70247060, - "language_name": "Italian", - "autonym": "Italiano", - "family": "Indo-European", - "flores_path": "ita_Latn", - "fleurs_tag": "it_it", - "commonvoice_hours": 362.0, - "commonvoice_locale": "it", - "in_benchmark": true, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.28142583904826096, - "model": 1.0 + "score": 0.45313578977486535, + "sentence_nr": 0 }, { - "bcp_47": "it", - "speakers": 70247060, - "language_name": "Italian", - "autonym": "Italiano", - "family": "Indo-European", - "flores_path": "ita_Latn", - "fleurs_tag": "it_it", - "commonvoice_hours": 362.0, - "commonvoice_locale": "it", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4707591889357925, - "model": 1.0 - }, - { - "bcp_47": "iu", - "speakers": 90466, - "language_name": "Inuktitut", - "autonym": "Inuktitut", - "family": "Eskimo-Aleut", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "izh", - "speakers": 142, - "language_name": "Ingrian", - "autonym": "Ingrian", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "izh", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.6160993561903745, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "ja", - "speakers": 119729026, - "language_name": "Japanese", - "autonym": "日本語", - "family": "Japonic", - "flores_path": "jpn_Jpan", - "fleurs_tag": "ja_jp", - "commonvoice_hours": 222.0, - "commonvoice_locale": "ja", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "model": 1.0 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.2651736858432996, + "sentence_nr": 0 }, { - "bcp_47": "ja", - "speakers": 119729026, - "language_name": "Japanese", - "autonym": "日本語", - "family": "Japonic", - "flores_path": "jpn_Jpan", - "fleurs_tag": "ja_jp", - "commonvoice_hours": 222.0, - "commonvoice_locale": "ja", - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", "metric": "chrf", - "score": 0.9371132855221468, - "model": 1.0 - }, + "score": 0.4491383344282561, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "ja", - "speakers": 119729026, - "language_name": "Japanese", - "autonym": "日本語", - "family": "Japonic", - "flores_path": "jpn_Jpan", - "fleurs_tag": "ja_jp", - "commonvoice_hours": 222.0, - "commonvoice_locale": "ja", - "in_benchmark": true, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.2723410893967824, - "model": 1.0 + "score": 0.34545319957597864, + "sentence_nr": 0 }, { - "bcp_47": "ja", - "speakers": 119729026, - "language_name": "Japanese", - "autonym": "日本語", - "family": "Japonic", - "flores_path": "jpn_Jpan", - "fleurs_tag": "ja_jp", - "commonvoice_hours": 222.0, - "commonvoice_locale": "ja", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.43929453749366865, - "model": 1.0 - }, - { - "bcp_47": "jam", - "speakers": 2668142, - "language_name": "Jamaican Creole English", - "autonym": "Jamaican Creole English", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "jbo", - "speakers": 0, - "language_name": "Lojban", - "autonym": "La .Lojban.", - "family": "Artificial Language", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "jbo", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "jgo", - "speakers": 94333, - "language_name": "Ngomba", - "autonym": "Ndaꞌa", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "jmc", - "speakers": 433291, - "language_name": "Machame", - "autonym": "Kimachame", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "jml", - "speakers": 970493, - "language_name": "Jumli", - "autonym": "Jumli", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "jut", - "speakers": 0, - "language_name": "Jutish", - "autonym": "Jutish", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5727052860304503, + "sentence_nr": 0 + } + ], + [ { - "bcp_47": "jv", - "speakers": 91180665, - "language_name": "Javanese", - "autonym": "Jawa", - "family": "Austronesian", - "flores_path": "jav_Latn", - "fleurs_tag": "jv_id", - "commonvoice_hours": 0.0, - "commonvoice_locale": "jv", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "model": 1.0 + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15815751066481462, + "sentence_nr": 1 }, { - "bcp_47": "jv", - "speakers": 91180665, - "language_name": "Javanese", - "autonym": "Jawa", - "family": "Austronesian", - "flores_path": "jav_Latn", - "fleurs_tag": "jv_id", - "commonvoice_hours": 0.0, - "commonvoice_locale": "jv", - "in_benchmark": true, - "task": "language_modeling", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9298143194922116, - "model": 1.0 - }, + "score": 0.5152611872266766, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "jv", - "speakers": 91180665, - "language_name": "Javanese", - "autonym": "Jawa", - "family": "Austronesian", - "flores_path": "jav_Latn", - "fleurs_tag": "jv_id", - "commonvoice_hours": 0.0, - "commonvoice_locale": "jv", - "in_benchmark": true, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.23729186537968905, - "model": 1.0 + "score": 0.12903696060775005, + "sentence_nr": 1 }, { - "bcp_47": "jv", - "speakers": 91180665, - "language_name": "Javanese", - "autonym": "Jawa", - "family": "Austronesian", - "flores_path": "jav_Latn", - "fleurs_tag": "jv_id", - "commonvoice_hours": 0.0, - "commonvoice_locale": "jv", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4198940727847352, - "model": 1.0 - }, - { - "bcp_47": "ka", - "speakers": 3543646, - "language_name": "Georgian", - "autonym": "ქართული", - "family": "Kartvelian", - "flores_path": "kat_Geor", - "fleurs_tag": "ka_ge", - "commonvoice_hours": 158.0, - "commonvoice_locale": "ka", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kaa", - "speakers": 489046, - "language_name": "Kara-Kalpak", - "autonym": "Kara-Kalpak", - "family": "Turkic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "kaa", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kab", - "speakers": 3351886, - "language_name": "Kabyle", - "autonym": "Taqbaylit", - "family": "Afro-Asiatic", - "flores_path": "kab_Latn", - "fleurs_tag": null, - "commonvoice_hours": 571.0, - "commonvoice_locale": "kab", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kac", - "speakers": 962032, - "language_name": "Kachin", - "autonym": "Kachin", - "family": "Sino-Tibetan", - "flores_path": "kac_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kaj", - "speakers": 449459, - "language_name": "Jju", - "autonym": "Kaje", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kam", - "speakers": 4068120, - "language_name": "Kamba", - "autonym": "Kikamba", - "family": "Atlantic-Congo", - "flores_path": "kam_Latn", - "fleurs_tag": "kam_ke", - "commonvoice_hours": 0.0, - "commonvoice_locale": "kam", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kao", - "speakers": 195534, - "language_name": "Xaasongaxango", - "autonym": "Xaasongaxango", - "family": "Mande", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kbd", - "speakers": 1070873, - "language_name": "Kabardian", - "autonym": "Kabardian", - "family": "Abkhaz-Adyge", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 19.0, - "commonvoice_locale": "kbd", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kcg", - "speakers": 199046, - "language_name": "Tyap", - "autonym": "Katab", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kck", - "speakers": 770954, - "language_name": "Kalanga", - "autonym": "Kalanga", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kde", - "speakers": 1463820, - "language_name": "Makonde", - "autonym": "Chimakonde", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kdt", - "speakers": 421207, - "language_name": "Kuy", - "autonym": "Kuy", - "family": "Austroasiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kea", - "speakers": 530762, - "language_name": "Kabuverdianu", - "autonym": "Kabuverdianu", - "family": "Indo-European", - "flores_path": "kea_Latn", - "fleurs_tag": "kea_cv", - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ken", - "speakers": 69362, - "language_name": "Kenyang", - "autonym": "Kɛnyaŋ", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kfo", - "speakers": 63207, - "language_name": "Koro", - "autonym": "Koro", - "family": "Mande", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kfr", - "speakers": 994568, - "language_name": "Kachhi", - "autonym": "Kachhi", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kfy", - "speakers": 2917398, - "language_name": "Kumaoni", - "autonym": "Kumaoni", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kg", - "speakers": 1526700, - "language_name": "Kongo", - "autonym": "Kongo", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kge", - "speakers": 854483, - "language_name": "Komering", - "autonym": "Komering", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kgp", - "speakers": 50812, - "language_name": "Kaingang", - "autonym": "Kanhgág", - "family": "Nuclear-Macro-Je", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kha", - "speakers": 1060872, - "language_name": "Khasi", - "autonym": "Khasi", - "family": "Austroasiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "khb", - "speakers": 264864, - "language_name": "Lü", - "autonym": "Lü", - "family": "Tai-Kadai", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "khn", - "speakers": 1989135, - "language_name": "Khandesi", - "autonym": "Khandesi", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "khq", - "speakers": 332408, - "language_name": "Koyra Chiini", - "autonym": "Koyra Ciini", - "family": "Songhay", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kht", - "speakers": 13527, - "language_name": "Khamti", - "autonym": "Khamti", - "family": "Tai-Kadai", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "khw", - "speakers": 350252, - "language_name": "Khowar", - "autonym": "Khowar", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 18.0, - "commonvoice_locale": "khw", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ki", - "speakers": 9099743, - "language_name": "Kikuyu", - "autonym": "Gikuyu", - "family": "Atlantic-Congo", - "flores_path": "kik_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "ki", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kiu", - "speakers": 155833, - "language_name": "Kirmanjki", - "autonym": "Kirmanjki", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kj", - "speakers": 920524, - "language_name": "Kuanyama", - "autonym": "Kuanyama", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kjg", - "speakers": 431949, - "language_name": "Khmu", - "autonym": "Khmu", - "family": "Austroasiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kk", - "speakers": 13637392, - "language_name": "Kazakh", - "autonym": "Қазақ Тілі", - "family": "Turkic", - "flores_path": "kaz_Cyrl", - "fleurs_tag": "kk_kz", - "commonvoice_hours": 2.1, - "commonvoice_locale": "kk", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kkj", - "speakers": 149823, - "language_name": "Kako", - "autonym": "Kakɔ", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kl", - "speakers": 55440, - "language_name": "Kalaallisut", - "autonym": "Kalaallisut", - "family": "Eskimo-Aleut", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kln", - "speakers": 4068120, - "language_name": "Kalenjin", - "autonym": "Kalenjin", - "family": "Nilotic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 43.0, - "commonvoice_locale": "kln", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "km", - "speakers": 15065030, - "language_name": "Khmer", - "autonym": "ខ្មែរ", - "family": "Austroasiatic", - "flores_path": "khm_Khmr", - "fleurs_tag": "km_kh", - "commonvoice_hours": 0.0, - "commonvoice_locale": "km", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kmb", - "speakers": 8130575, - "language_name": "Kimbundu", - "autonym": "Kimbundu", - "family": "Atlantic-Congo", - "flores_path": "kmb_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kn", - "speakers": 49065330, - "language_name": "Kannada", - "autonym": "ಕನ್ನಡ", - "family": "Dravidian", - "flores_path": "kan_Knda", - "fleurs_tag": "kn_in", - "commonvoice_hours": 0.0, - "commonvoice_locale": "kn", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "knf", - "speakers": 83151, - "language_name": "Mankanya", - "autonym": "Mankanya", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.456225988032654, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "ko", - "speakers": 78357046, - "language_name": "Korean", - "autonym": "한국어", - "family": "Koreanic", - "flores_path": "kor_Hang", - "fleurs_tag": "ko_kr", - "commonvoice_hours": 1.7, - "commonvoice_locale": "ko", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "model": 1.0 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { - "bcp_47": "ko", - "speakers": 78357046, - "language_name": "Korean", - "autonym": "한국어", - "family": "Koreanic", - "flores_path": "kor_Hang", - "fleurs_tag": "ko_kr", - "commonvoice_hours": 1.7, - "commonvoice_locale": "ko", - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9355445912073929, - "model": 1.0 - }, + "score": 0.024459391267874976, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "ko", - "speakers": 78357046, - "language_name": "Korean", - "autonym": "한국어", - "family": "Koreanic", - "flores_path": "kor_Hang", - "fleurs_tag": "ko_kr", - "commonvoice_hours": 1.7, - "commonvoice_locale": "ko", - "in_benchmark": true, + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.20332074778330964, - "model": 1.0 + "score": 0.12351824822447692, + "sentence_nr": 1 }, { - "bcp_47": "ko", - "speakers": 78357046, - "language_name": "Korean", - "autonym": "한국어", - "family": "Koreanic", - "flores_path": "kor_Hang", - "fleurs_tag": "ko_kr", - "commonvoice_hours": 1.7, - "commonvoice_locale": "ko", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4005255477730261, - "model": 1.0 - }, - { - "bcp_47": "koi", - "speakers": 63775, - "language_name": "Komi-Permyak", - "autonym": "Komi-Permyak", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kok", - "speakers": 4906533, - "language_name": "Konkani", - "autonym": "कोंकणी", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "knn", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kos", - "speakers": 7990, - "language_name": "Kosraean", - "autonym": "Kosraean", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kpe", - "speakers": 1186303, - "language_name": "Kpelle", - "autonym": "Kpɛlɛɛ", - "family": "Mande", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "krc", - "speakers": 240927, - "language_name": "Karachay-Balkar", - "autonym": "Karachay-Balkar", - "family": "Turkic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "krc", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kri", - "speakers": 6293684, - "language_name": "Krio", - "autonym": "Krio", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "krj", - "speakers": 425806, - "language_name": "Kinaray-a", - "autonym": "Kinaray-A", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "krl", - "speakers": 116212, - "language_name": "Karelian", - "autonym": "Karelian", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kru", - "speakers": 2519571, - "language_name": "Kurukh", - "autonym": "Kurukh", - "family": "Dravidian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ks", - "speakers": 5598085, - "language_name": "Kashmiri", - "autonym": "کٲشُر", - "family": "Indo-European", - "flores_path": "kas_Arab", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "ks", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ksb", - "speakers": 995398, - "language_name": "Shambala", - "autonym": "Kishambaa", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ksf", - "speakers": 88784, - "language_name": "Bafia", - "autonym": "Rikpa", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 20.0, - "commonvoice_locale": "ksf", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ksh", - "speakers": 240479, - "language_name": "Colognian", - "autonym": "Kölsch", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ku", - "speakers": 6866757, - "language_name": "Kurdish", - "autonym": "Kurdî (Kurmancî)", - "family": "Indo-European", - "flores_path": "kmr_Latn", - "fleurs_tag": null, - "commonvoice_hours": 69.0, - "commonvoice_locale": "kmr", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kum", - "speakers": 283444, - "language_name": "Kumyk", - "autonym": "Kumyk", - "family": "Turkic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kv", - "speakers": 255100, - "language_name": "Komi", - "autonym": "Komi", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "kpv", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kvr", - "speakers": 373836, - "language_name": "Kerinci", - "autonym": "Kerinci", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kvx", - "speakers": 373602, - "language_name": "Parkari Koli", - "autonym": "Parkari Koli", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kw", - "speakers": 1973, - "language_name": "Cornish", - "autonym": "Kernewek", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 8.1, - "commonvoice_locale": "kw", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kwk", - "speakers": 377, - "language_name": "Kwakʼwala", - "autonym": "KwakʼWala", - "family": "Wakashan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kxm", - "speakers": 1172616, - "language_name": "Northern Khmer", - "autonym": "Northern Khmer", - "family": "Austroasiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kxp", - "speakers": 256851, - "language_name": "Wadiyara Koli", - "autonym": "Wadiyara Koli", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "kxv", - "speakers": 38457, - "language_name": "Kuvi", - "autonym": "Kuvi", - "family": "Dravidian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ky", - "speakers": 3338267, - "language_name": "Kyrgyz", - "autonym": "Кыргызча", - "family": "Turkic", - "flores_path": "kir_Cyrl", - "fleurs_tag": "ky_kg", - "commonvoice_hours": 39.0, - "commonvoice_locale": "ky", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "la", - "speakers": 820, - "language_name": "Latin", - "autonym": "Lingua Latina", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lad", - "speakers": 112781, - "language_name": "Ladino", - "autonym": "Ladino", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lag", - "speakers": 509409, - "language_name": "Langi", - "autonym": "Kɨlaangi", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lah", - "speakers": 93433552, - "language_name": "Western Panjabi", - "autonym": "لہندا پنجابی", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "laj", - "speakers": 1643614, - "language_name": "Lango (Uganda)", - "autonym": "Lango (Uganda)", - "family": "Nilotic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lb", - "speakers": 421015, - "language_name": "Luxembourgish", - "autonym": "Lëtzebuergesch", - "family": "Indo-European", - "flores_path": "ltz_Latn", - "fleurs_tag": "lb_lu", - "commonvoice_hours": 0.0, - "commonvoice_locale": "lb", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lbe", - "speakers": 110543, - "language_name": "Lak", - "autonym": "Lak", - "family": "Nakh-Daghestanian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lbw", - "speakers": 347134, - "language_name": "Tolaki", - "autonym": "Tolaki", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lcp", - "speakers": 87751, - "language_name": "Western Lawa", - "autonym": "Western Lawa", - "family": "Austroasiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lep", - "speakers": 79743, - "language_name": "Lepcha", - "autonym": "Lepcha", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lez", - "speakers": 255100, - "language_name": "Lezghian", - "autonym": "Lezghian", - "family": "Nakh-Daghestanian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lg", - "speakers": 5622890, - "language_name": "Ganda", - "autonym": "Luganda", - "family": "Atlantic-Congo", - "flores_path": "lug_Latn", - "fleurs_tag": "lg_ug", - "commonvoice_hours": 437.0, - "commonvoice_locale": "lg", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "li", - "speakers": 950422, - "language_name": "Limburgish", - "autonym": "Limburgish", - "family": "Indo-European", - "flores_path": "lim_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lif", - "speakers": 368085, - "language_name": "Limbu", - "autonym": "Limbu", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lij", - "speakers": 536663, - "language_name": "Ligurian", - "autonym": "Ligure", - "family": "Indo-European", - "flores_path": "lij_Latn", - "fleurs_tag": null, - "commonvoice_hours": 5.1, - "commonvoice_locale": "lij", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lil", - "speakers": 528, - "language_name": "Lillooet", - "autonym": "Lillooet", - "family": "Salishan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lis", - "speakers": 627309, - "language_name": "Lisu", - "autonym": "Lisu", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ljp", - "speakers": 1842479, - "language_name": "Lampung Api", - "autonym": "Lampung Api", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lki", - "speakers": 645417, - "language_name": "Laki", - "autonym": "Laki", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lkt", - "speakers": 8316, - "language_name": "Lakota", - "autonym": "LakȟólʼIyapi", - "family": "Siouan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lmn", - "speakers": 3580443, - "language_name": "Lambadi", - "autonym": "Lambadi", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lmo", - "speakers": 3901518, - "language_name": "Lombard", - "autonym": "Lombard", - "family": "Indo-European", - "flores_path": "lmo_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ln", - "speakers": 3514491, - "language_name": "Lingala", - "autonym": "Lingála", - "family": "Atlantic-Congo", - "flores_path": "lin_Latn", - "fleurs_tag": "ln_cd", - "commonvoice_hours": 0.0, - "commonvoice_locale": "ln", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lo", - "speakers": 5138706, - "language_name": "Lao", - "autonym": "ລາວ", - "family": "Tai-Kadai", - "flores_path": "lao_Laoo", - "fleurs_tag": "lo_la", - "commonvoice_hours": 0.2, - "commonvoice_locale": "lo", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lol", - "speakers": 620858, - "language_name": "Mongo", - "autonym": "Mongo", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "loz", - "speakers": 1045596, - "language_name": "Lozi", - "autonym": "Lozi", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lrc", - "speakers": 2020512, - "language_name": "Northern Luri", - "autonym": "لۊری شومالی", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lt", - "speakers": 2488617, - "language_name": "Lithuanian", - "autonym": "Lietuvių", - "family": "Indo-European", - "flores_path": "lit_Latn", - "fleurs_tag": "lt_lt", - "commonvoice_hours": 25.0, - "commonvoice_locale": "lt", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ltg", - "speakers": 167429, - "language_name": "Latgalian", - "autonym": "Latgalian", - "family": "Indo-European", - "flores_path": "ltg_Latn", - "fleurs_tag": null, - "commonvoice_hours": 29.0, - "commonvoice_locale": "ltg", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lu", - "speakers": 2340940, - "language_name": "Luba-Katanga", - "autonym": "Tshiluba", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lua", - "speakers": 9770880, - "language_name": "Luba-Lulua", - "autonym": "Luba-Lulua", - "family": "Atlantic-Congo", - "flores_path": "lua_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "luo", - "speakers": 5245734, - "language_name": "Luo (Kenya and Tanzania)", - "autonym": "Dholuo", - "family": "Nilotic", - "flores_path": "luo_Latn", - "fleurs_tag": "luo_ke", - "commonvoice_hours": 30.0, - "commonvoice_locale": "luo", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "luy", - "speakers": 5888069, - "language_name": "Luyia", - "autonym": "Luluhia", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "luz", - "speakers": 1019080, - "language_name": "Southern Luri", - "autonym": "Southern Luri", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lv", - "speakers": 1147550, - "language_name": "Latvian", - "autonym": "Latviešu", - "family": "Indo-European", - "flores_path": "lvs_Latn", - "fleurs_tag": "lv_lv", - "commonvoice_hours": 260.0, - "commonvoice_locale": "lv", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lwl", - "speakers": 6898, - "language_name": "Eastern Lawa", - "autonym": "Eastern Lawa", - "family": "Austroasiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lzh", - "speakers": 0, - "language_name": "Literary Chinese", - "autonym": "Literary Chinese", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "lzz", - "speakers": 22965, - "language_name": "Laz", - "autonym": "Laz", - "family": "Kartvelian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "lzz", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mad", - "speakers": 16822638, - "language_name": "Madurese", - "autonym": "Madurese", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "maf", - "speakers": 205313, - "language_name": "Mafa", - "autonym": "Mafa", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mag", - "speakers": 15913080, - "language_name": "Magahi", - "autonym": "Magahi", - "family": "Indo-European", - "flores_path": "mag_Deva", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mai", - "speakers": 19249149, - "language_name": "Maithili", - "autonym": "मैथिली", - "family": "Indo-European", - "flores_path": "mai_Deva", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "mai", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mak", - "speakers": 1949290, - "language_name": "Makasar", - "autonym": "Makasar", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "man", - "speakers": 3511762, - "language_name": "Mandingo", - "autonym": "Mandingo", - "family": "Mande", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mas", - "speakers": 1734738, - "language_name": "Masai", - "autonym": "Maa", - "family": "Nilotic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "maz", - "speakers": 437410, - "language_name": "Central Mazahua", - "autonym": "Central Mazahua", - "family": "Otomanguean", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mdf", - "speakers": 297616, - "language_name": "Moksha", - "autonym": "Мокшень Кяль", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.5, - "commonvoice_locale": "mdf", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mdh", - "speakers": 1310172, - "language_name": "Maguindanaon", - "autonym": "Maguindanaon", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mdr", - "speakers": 245664, - "language_name": "Mandar", - "autonym": "Mandar", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "men", - "speakers": 1813083, - "language_name": "Mende", - "autonym": "Mende", - "family": "Mande", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mer", - "speakers": 2141116, - "language_name": "Meru", - "autonym": "Kĩmĩrũ", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mey", - "speakers": 7239, - "language_name": "Hassaniyya", - "autonym": "Hassaniyya", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mfa", - "speakers": 3448870, - "language_name": "Pattani Malay", - "autonym": "Pattani Malay", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mfe", - "speakers": 1241433, - "language_name": "Morisyen", - "autonym": "Kreol Morisien", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mfv", - "speakers": 121170, - "language_name": "Mandjak", - "autonym": "Mandjak", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mg", - "speakers": 24260130, - "language_name": "Malagasy", - "autonym": "Malagasy", - "family": "Austronesian", - "flores_path": "plt_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "mg", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mgh", - "speakers": 1354419, - "language_name": "Makhuwa-Meetto", - "autonym": "Makua", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mgo", - "speakers": 130401, - "language_name": "Metaʼ", - "autonym": "Metaʼ", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mgp", - "speakers": 333607, - "language_name": "Eastern Magar", - "autonym": "Eastern Magar", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mgy", - "speakers": 819739, - "language_name": "Mbunga", - "autonym": "Mbunga", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mh", - "speakers": 56879, - "language_name": "Marshallese", - "autonym": "Marshallese", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mi", - "speakers": 137913, - "language_name": "Māori", - "autonym": "Māori", - "family": "Austronesian", - "flores_path": "mri_Latn", - "fleurs_tag": "mi_nz", - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mic", - "speakers": 7916, - "language_name": "Mi'kmaw", - "autonym": "LʼNuiʼSuti", - "family": "Algic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "min", - "speakers": 8010780, - "language_name": "Minangkabau", - "autonym": "Minangkabau", - "family": "Austronesian", - "flores_path": "min_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mk", - "speakers": 1608565, - "language_name": "Macedonian", - "autonym": "Македонски", - "family": "Indo-European", - "flores_path": "mkd_Cyrl", - "fleurs_tag": "mk_mk", - "commonvoice_hours": 19.0, - "commonvoice_locale": "mk", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ml", - "speakers": 43257484, - "language_name": "Malayalam", - "autonym": "മലയാളം", - "family": "Dravidian", - "flores_path": "mal_Mlym", - "fleurs_tag": "ml_in", - "commonvoice_hours": 2.8, - "commonvoice_locale": "ml", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mls", - "speakers": 451060, - "language_name": "Masalit", - "autonym": "Masalit", - "family": "Maban", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mn", - "speakers": 6572846, - "language_name": "Mongolian", - "autonym": "Монгол", - "family": "Mongolic-Khitan", - "flores_path": "khk_Cyrl", - "fleurs_tag": "mn_mn", - "commonvoice_hours": 46.0, - "commonvoice_locale": "mn", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mni", - "speakers": 1476591, - "language_name": "Manipuri", - "autonym": "মৈতৈলোন্", - "family": "Sino-Tibetan", - "flores_path": "mni_Beng", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "mni", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mnw", - "speakers": 966114, - "language_name": "Mon", - "autonym": "Mon", - "family": "Austroasiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "moe", - "speakers": 12062, - "language_name": "Innu-aimun", - "autonym": "Innu-Aimun", - "family": "Algic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "moh", - "speakers": 1772, - "language_name": "Mohawk", - "autonym": "KanienʼKéha", - "family": "Iroquoian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mos", - "speakers": 8334160, - "language_name": "Mossi", - "autonym": "Mossi", - "family": "Atlantic-Congo", - "flores_path": "mos_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "mos", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.46822754470803873, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "mr", - "speakers": 92826300, - "language_name": "Marathi", - "autonym": "मराठी", - "family": "Indo-European", - "flores_path": "mar_Deva", - "fleurs_tag": "mr_in", - "commonvoice_hours": 20.0, - "commonvoice_locale": "mr", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "model": 1.0 + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { - "bcp_47": "mr", - "speakers": 92826300, - "language_name": "Marathi", - "autonym": "मराठी", - "family": "Indo-European", - "flores_path": "mar_Deva", - "fleurs_tag": "mr_in", - "commonvoice_hours": 20.0, - "commonvoice_locale": "mr", - "in_benchmark": true, - "task": "language_modeling", + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9440892312053646, - "model": 1.0 - }, + "score": 0.3465147345201782, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "mr", - "speakers": 92826300, - "language_name": "Marathi", - "autonym": "मराठी", - "family": "Indo-European", - "flores_path": "mar_Deva", - "fleurs_tag": "mr_in", - "commonvoice_hours": 20.0, - "commonvoice_locale": "mr", - "in_benchmark": true, + "model": "mistralai/mistral-nemo", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.24903049799655144, - "model": 1.0 + "score": 0.08516700886866406, + "sentence_nr": 1 }, { - "bcp_47": "mr", - "speakers": 92826300, - "language_name": "Marathi", - "autonym": "मराठी", - "family": "Indo-European", - "flores_path": "mar_Deva", - "fleurs_tag": "mr_in", - "commonvoice_hours": 20.0, - "commonvoice_locale": "mr", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.42489125861884175, - "model": 1.0 - }, - { - "bcp_47": "mrd", - "speakers": 251722, - "language_name": "Western Magar", - "autonym": "Western Magar", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mrj", - "speakers": 29762, - "language_name": "Western Mari", - "autonym": "Western Mari", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 34.0, - "commonvoice_locale": "mrj", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mro", - "speakers": 29277, - "language_name": "Mru", - "autonym": "Mru", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ms", - "speakers": 38097307, - "language_name": "Malay", - "autonym": "Bahasa Malaysia", - "family": "Austronesian", - "flores_path": "zsm_Latn", - "fleurs_tag": "ms_my", - "commonvoice_hours": 0.0, - "commonvoice_locale": "ms", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mt", - "speakers": 457267, - "language_name": "Maltese", - "autonym": "Malti", - "family": "Afro-Asiatic", - "flores_path": "mlt_Latn", - "fleurs_tag": "mt_mt", - "commonvoice_hours": 8.7, - "commonvoice_locale": "mt", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mtr", - "speakers": 1286307, - "language_name": "Mewari", - "autonym": "Mewari", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mua", - "speakers": 277450, - "language_name": "Mundang", - "autonym": "Mundaŋ", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mus", - "speakers": 3992, - "language_name": "Muscogee", - "autonym": "Mvskoke", - "family": "Muskogean", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mvy", - "speakers": 326901, - "language_name": "Indus Kohistani", - "autonym": "Indus Kohistani", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 23.0, - "commonvoice_locale": "mvy", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mwk", - "speakers": 977670, - "language_name": "Kita Maninkakan", - "autonym": "Kita Maninkakan", - "family": "Mande", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mwr", - "speakers": 15913080, - "language_name": "Marwari", - "autonym": "Marwari", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mwv", - "speakers": 64086, - "language_name": "Mentawai", - "autonym": "Mentawai", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mxc", - "speakers": 945510, - "language_name": "Manyika", - "autonym": "Manyika", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "my", - "speakers": 36559231, - "language_name": "Burmese", - "autonym": "မြန်မာ", - "family": "Sino-Tibetan", - "flores_path": "mya_Mymr", - "fleurs_tag": "my_mm", - "commonvoice_hours": 0.0, - "commonvoice_locale": "my", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "myv", - "speakers": 439338, - "language_name": "Erzya", - "autonym": "Эрзянь Кель", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 3.8, - "commonvoice_locale": "myv", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "myx", - "speakers": 1254337, - "language_name": "Masaaba", - "autonym": "Masaaba", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "mzn", - "speakers": 4246165, - "language_name": "Mazanderani", - "autonym": "مازرونی", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "na", - "speakers": 6930, - "language_name": "Nauru", - "autonym": "Nauru", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nan", - "speakers": 26486380, - "language_name": "Min Nan Chinese", - "autonym": "Min Nan Chinese", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nap", - "speakers": 605306, - "language_name": "Neapolitan", - "autonym": "Neapolitan", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "naq", - "speakers": 289308, - "language_name": "Nama", - "autonym": "Khoekhoegowab", - "family": "Khoe-Kwadi", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nb", - "speakers": 5468932, - "language_name": "Norwegian Bokmål", - "autonym": "Norsk Bokmål", - "family": "Indo-European", - "flores_path": "nob_Latn", - "fleurs_tag": "nb_no", - "commonvoice_hours": 0.1, - "commonvoice_locale": "nb-NO", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nch", - "speakers": 244435, - "language_name": "Central Huasteca Nahuatl", - "autonym": "Central Huasteca Nahuatl", - "family": "Uto-Aztecan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nd", - "speakers": 1745556, - "language_name": "North Ndebele", - "autonym": "Isindebele", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "nd", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ndc", - "speakers": 3867046, - "language_name": "Ndau", - "autonym": "Ndau", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nds", - "speakers": 11520008, - "language_name": "Low German", - "autonym": "Neddersass’Sch", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ne", - "speakers": 20903374, - "language_name": "Nepali", - "autonym": "नेपाली", - "family": "Indo-European", - "flores_path": "npi_Deva", - "fleurs_tag": "ne_np", - "commonvoice_hours": 1.3, - "commonvoice_locale": "ne-NP", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "new", - "speakers": 1000821, - "language_name": "Newari", - "autonym": "Newari", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "new", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ng", - "speakers": 552315, - "language_name": "Ndonga", - "autonym": "Ndonga", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ngl", - "speakers": 2046678, - "language_name": "Lomwe", - "autonym": "Lomwe", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nhe", - "speakers": 501735, - "language_name": "Eastern Huasteca Nahuatl", - "autonym": "Eastern Huasteca Nahuatl", - "family": "Uto-Aztecan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "nhe", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nhw", - "speakers": 501735, - "language_name": "Western Huasteca Nahuatl", - "autonym": "Western Huasteca Nahuatl", - "family": "Uto-Aztecan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nij", - "speakers": 987996, - "language_name": "Ngaju", - "autonym": "Ngaju", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "niu", - "speakers": 1120, - "language_name": "Niuean", - "autonym": "Niuean", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "njo", - "speakers": 305001, - "language_name": "Ao Naga", - "autonym": "Ao Naga", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nl", - "speakers": 31765645, - "language_name": "Dutch", - "autonym": "Nederlands", - "family": "Indo-European", - "flores_path": "nld_Latn", - "fleurs_tag": "nl_nl", - "commonvoice_hours": 114.0, - "commonvoice_locale": "nl", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nmg", - "speakers": 8878, - "language_name": "Kwasio", - "autonym": "Kwasio", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nn", - "speakers": 1366860, - "language_name": "Norwegian Nynorsk", - "autonym": "Norsk Nynorsk", - "family": "Indo-European", - "flores_path": "nno_Latn", - "fleurs_tag": null, - "commonvoice_hours": 1.5, - "commonvoice_locale": "nn-NO", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nnh", - "speakers": 388430, - "language_name": "Ngiemboon", - "autonym": "Shwóŋò Ngiembɔɔn", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 23.0, - "commonvoice_locale": "nnh", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "no", - "speakers": 5467440, - "language_name": "Norwegian", - "autonym": "Norsk", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nod", - "speakers": 6621830, - "language_name": "Northern Thai", - "autonym": "Northern Thai", - "family": "Tai-Kadai", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "noe", - "speakers": 1723917, - "language_name": "Nimadi", - "autonym": "Nimadi", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nqo", - "speakers": 626370, - "language_name": "N’Ko", - "autonym": "ߒߞߏ", - "family": "Artificial Language", - "flores_path": "nqo_Nkoo", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "nqo", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nr", - "speakers": 903418, - "language_name": "South Ndebele", - "autonym": "South Ndebele", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "nr", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nsk", - "speakers": 1395, - "language_name": "Naskapi", - "autonym": "Naskapi", - "family": "Algic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nso", - "speakers": 5307578, - "language_name": "Northern Sotho", - "autonym": "Northern Sotho", - "family": "Atlantic-Congo", - "flores_path": "nso_Latn", - "fleurs_tag": "nso_za", - "commonvoice_hours": 0.0, - "commonvoice_locale": "nso", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nus", - "speakers": 591427, - "language_name": "Nuer", - "autonym": "Thok Nath", - "family": "Nilotic", - "flores_path": "nus_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nv", - "speakers": 166320, - "language_name": "Navajo", - "autonym": "Diné Bizaad", - "family": "Athabaskan-Eyak-Tlingit", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nxq", - "speakers": 334565, - "language_name": "Naxi", - "autonym": "Naxi", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ny", - "speakers": 17026781, - "language_name": "Nyanja", - "autonym": "Nyanja", - "family": "Atlantic-Congo", - "flores_path": "nya_Latn", - "fleurs_tag": "ny_mw", - "commonvoice_hours": 0.0, - "commonvoice_locale": "ny", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nym", - "speakers": 1932242, - "language_name": "Nyamwezi", - "autonym": "Nyamwezi", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nyn", - "speakers": 2724939, - "language_name": "Nyankole", - "autonym": "Runyankore", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "nyn", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "nzi", - "speakers": 293402, - "language_name": "Nzima", - "autonym": "Nzima", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "oc", - "speakers": 2040398, - "language_name": "Occitan", - "autonym": "Occitan", - "family": "Indo-European", - "flores_path": "oci_Latn", - "fleurs_tag": "oc_fr", - "commonvoice_hours": 1.8, - "commonvoice_locale": "oc", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "oj", - "speakers": 23747, - "language_name": "Ojibwa", - "autonym": "Ojibwa", - "family": "Algic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ojs", - "speakers": 15078, - "language_name": "Oji-Cree", - "autonym": "Oji-Cree", - "family": "Algic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "oka", - "speakers": 490, - "language_name": "Okanagan", - "autonym": "Okanagan", - "family": "Salishan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "om", - "speakers": 34897121, - "language_name": "Oromo", - "autonym": "Oromoo", - "family": "Afro-Asiatic", - "flores_path": "gaz_Latn", - "fleurs_tag": "om_et", - "commonvoice_hours": 0.0, - "commonvoice_locale": "om", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "or", - "speakers": 42434880, - "language_name": "Odia", - "autonym": "ଓଡ଼ିଆ", - "family": "Indo-European", - "flores_path": "ory_Orya", - "fleurs_tag": "or_in", - "commonvoice_hours": 2.8, - "commonvoice_locale": "or", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "os", - "speakers": 541444, - "language_name": "Ossetic", - "autonym": "Ирон", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.8, - "commonvoice_locale": "os", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "osa", - "speakers": 0, - "language_name": "Osage", - "autonym": "𐓏𐓘𐓻𐓘𐓻𐓟", - "family": "Siouan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4091252890943268, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "pa", - "speakers": 203571210, - "language_name": "Punjabi", - "autonym": "ਪੰਜਾਬੀ", - "family": "Indo-European", - "flores_path": "pan_Guru", - "fleurs_tag": "pa_in", - "commonvoice_hours": 2.3, - "commonvoice_locale": "pa-IN", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "model": 1.0 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.19194937906573872, + "sentence_nr": 1 }, { - "bcp_47": "pa", - "speakers": 203571210, - "language_name": "Punjabi", - "autonym": "ਪੰਜਾਬੀ", - "family": "Indo-European", - "flores_path": "pan_Guru", - "fleurs_tag": "pa_in", - "commonvoice_hours": 2.3, - "commonvoice_locale": "pa-IN", - "in_benchmark": true, - "task": "language_modeling", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9025298113664532, - "model": 1.0 - }, + "score": 0.5477665664300843, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "pa", - "speakers": 203571210, - "language_name": "Punjabi", - "autonym": "ਪੰਜਾਬੀ", - "family": "Indo-European", - "flores_path": "pan_Guru", - "fleurs_tag": "pa_in", - "commonvoice_hours": 2.3, - "commonvoice_locale": "pa-IN", - "in_benchmark": true, + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.3368333727390049, - "model": 1.0 + "score": 0.0, + "sentence_nr": 1 }, { - "bcp_47": "pa", - "speakers": 203571210, - "language_name": "Punjabi", - "autonym": "ਪੰਜਾਬੀ", - "family": "Indo-European", - "flores_path": "pan_Guru", - "fleurs_tag": "pa_in", - "commonvoice_hours": 2.3, - "commonvoice_locale": "pa-IN", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4873541452250132, - "model": 1.0 - }, - { - "bcp_47": "pag", - "speakers": 1528534, - "language_name": "Pangasinan", - "autonym": "Pangasinan", - "family": "Austronesian", - "flores_path": "pag_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pam", - "speakers": 2511163, - "language_name": "Pampanga", - "autonym": "Pampanga", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pap", - "speakers": 211640, - "language_name": "Papiamento", - "autonym": "Papiamentu", - "family": "Indo-European", - "flores_path": "pap_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "pap-AW", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pau", - "speakers": 16047, - "language_name": "Palauan", - "autonym": "Palauan", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pcd", - "speakers": 746330, - "language_name": "Picard", - "autonym": "Picard", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pcm", - "speakers": 44945880, - "language_name": "Nigerian Pidgin", - "autonym": "Naijíriá Píjin", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 14.0, - "commonvoice_locale": "pcm", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pdc", - "speakers": 129729, - "language_name": "Pennsylvania German", - "autonym": "Pennsylvania German", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pdt", - "speakers": 90466, - "language_name": "Plautdietsch", - "autonym": "Plautdietsch", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pfl", - "speakers": 0, - "language_name": "Palatine German", - "autonym": "Palatine German", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pis", - "speakers": 561780, - "language_name": "Pijin", - "autonym": "Pijin", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pko", - "speakers": 369343, - "language_name": "Pökoot", - "autonym": "Pökoot", - "family": "Nilotic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pl", - "speakers": 41077399, - "language_name": "Polish", - "autonym": "Polski", - "family": "Indo-European", - "flores_path": "pol_Latn", - "fleurs_tag": "pl_pl", - "commonvoice_hours": 174.0, - "commonvoice_locale": "pl", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pms", - "speakers": 6178, - "language_name": "Piedmontese", - "autonym": "Piedmontese", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pnt", - "speakers": 392463, - "language_name": "Pontic", - "autonym": "Pontic", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pon", - "speakers": 23560, - "language_name": "Pohnpeian", - "autonym": "Pohnpeian", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "pqm", - "speakers": 490, - "language_name": "Maliseet-Passamaquoddy", - "autonym": "Maliseet-Passamaquoddy", - "family": "Algic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "prd", - "speakers": 864342, - "language_name": "Parsi-Dari", - "autonym": "Parsi-Dari", - "family": "Bookkeeping", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "prg", - "speakers": 38, - "language_name": "Prussian", - "autonym": "Prūsiskan", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ps", - "speakers": 53542641, - "language_name": "Pashto", - "autonym": "پښتو", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": "ps_af", - "commonvoice_hours": 80.0, - "commonvoice_locale": "ps", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4370196290761142, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "pt", - "speakers": 237496885, - "language_name": "Portuguese", - "autonym": "Português", - "family": "Indo-European", - "flores_path": "por_Latn", - "fleurs_tag": "pt_br", - "commonvoice_hours": 177.0, - "commonvoice_locale": "pt", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5, - "model": 1.0 + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.20669086265781264, + "sentence_nr": 1 }, { - "bcp_47": "pt", - "speakers": 237496885, - "language_name": "Portuguese", - "autonym": "Português", - "family": "Indo-European", - "flores_path": "por_Latn", - "fleurs_tag": "pt_br", - "commonvoice_hours": 177.0, - "commonvoice_locale": "pt", - "in_benchmark": true, - "task": "language_modeling", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9641423549595803, - "model": 1.0 - }, + "score": 0.5076721272198604, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "pt", - "speakers": 237496885, - "language_name": "Portuguese", - "autonym": "Português", - "family": "Indo-European", - "flores_path": "por_Latn", - "fleurs_tag": "pt_br", - "commonvoice_hours": 177.0, - "commonvoice_locale": "pt", - "in_benchmark": true, + "model": "qwen/qwq-32b", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.3110105331834714, - "model": 1.0 + "score": 0.17630490037560695, + "sentence_nr": 1 }, { - "bcp_47": "pt", - "speakers": 237496885, - "language_name": "Portuguese", - "autonym": "Português", - "family": "Indo-European", - "flores_path": "por_Latn", - "fleurs_tag": "pt_br", - "commonvoice_hours": 177.0, - "commonvoice_locale": "pt", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.49172080600981716, - "model": 1.0 - }, - { - "bcp_47": "puu", - "speakers": 200782, - "language_name": "Punu", - "autonym": "Punu", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "qu", - "speakers": 11385851, - "language_name": "Quechua", - "autonym": "Runasimi", - "family": "Quechuan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "quc", - "speakers": 1200731, - "language_name": "Kʼicheʼ", - "autonym": "KʼIcheʼ", - "family": "Mayan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "quc", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "qug", - "speakers": 963579, - "language_name": "Chimborazo Highland Quichua", - "autonym": "Chimborazo Highland Quichua", - "family": "Quechuan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "raj", - "speakers": 1326090, - "language_name": "Rajasthani", - "autonym": "राजस्थानी", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rcf", - "speakers": 559185, - "language_name": "Réunion Creole French", - "autonym": "Réunion Creole French", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rej", - "speakers": 1228320, - "language_name": "Rejang", - "autonym": "Rejang", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rgn", - "speakers": 0, - "language_name": "Romagnol", - "autonym": "Romagnol", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rhg", - "speakers": 1824082, - "language_name": "Rohingya", - "autonym": "𐴌𐴗𐴥𐴝𐴙𐴚𐴒𐴙𐴝", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ria", - "speakers": 172392, - "language_name": "Riang (India)", - "autonym": "Riang (India)", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rif", - "speakers": 3692411, - "language_name": "Riffian", - "autonym": "Tarifit", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "rif", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rjs", - "speakers": 133443, - "language_name": "Rajbanshi", - "autonym": "Rajbanshi", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rkt", - "speakers": 16274502, - "language_name": "Rangpuri", - "autonym": "Rangpuri", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rm", - "speakers": 42020, - "language_name": "Romansh", - "autonym": "Rumantsch", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rmf", - "speakers": 5015, - "language_name": "Kalo Finnish Romani", - "autonym": "Kalo Finnish Romani", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rmo", - "speakers": 24372, - "language_name": "Sinte Romani", - "autonym": "Sinte Romani", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rmt", - "speakers": 1613543, - "language_name": "Domari", - "autonym": "Domari", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rmu", - "speakers": 9488, - "language_name": "Tavringer Romani", - "autonym": "Tavringer Romani", - "family": "Speech Register", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rn", - "speakers": 7475454, - "language_name": "Rundi", - "autonym": "Ikirundi", - "family": "Atlantic-Congo", - "flores_path": "run_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "rn", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rng", - "speakers": 1023339, - "language_name": "Ronga", - "autonym": "Ronga", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ro", - "speakers": 22187408, - "language_name": "Romanian", - "autonym": "Română", - "family": "Indo-European", - "flores_path": "ron_Latn", - "fleurs_tag": "ro_ro", - "commonvoice_hours": 21.0, - "commonvoice_locale": "ro", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rob", - "speakers": 293729, - "language_name": "Tae'", - "autonym": "Tae'", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rof", - "speakers": 433291, - "language_name": "Rombo", - "autonym": "Kihorombo", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rtm", - "speakers": 2527, - "language_name": "Rotuman", - "autonym": "Rotuman", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.48116430160978857, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "ru", - "speakers": 195841151, - "language_name": "Russian", - "autonym": "Русский", - "family": "Indo-European", - "flores_path": "rus_Cyrl", - "fleurs_tag": "ru_ru", - "commonvoice_hours": 243.0, - "commonvoice_locale": "ru", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "model": 1.0 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { - "bcp_47": "ru", - "speakers": 195841151, - "language_name": "Russian", - "autonym": "Русский", - "family": "Indo-European", - "flores_path": "rus_Cyrl", - "fleurs_tag": "ru_ru", - "commonvoice_hours": 243.0, - "commonvoice_locale": "ru", - "in_benchmark": true, - "task": "language_modeling", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9663696817874857, - "model": 1.0 - }, + "score": 0.4122750002638689, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "ru", - "speakers": 195841151, - "language_name": "Russian", - "autonym": "Русский", - "family": "Indo-European", - "flores_path": "rus_Cyrl", - "fleurs_tag": "ru_ru", - "commonvoice_hours": 243.0, - "commonvoice_locale": "ru", - "in_benchmark": true, + "model": "amazon/nova-micro-v1", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.26015179309825326, - "model": 1.0 + "score": 0.15412719160788987, + "sentence_nr": 1 }, { - "bcp_47": "ru", - "speakers": 195841151, - "language_name": "Russian", - "autonym": "Русский", - "family": "Indo-European", - "flores_path": "rus_Cyrl", - "fleurs_tag": "ru_ru", - "commonvoice_hours": 243.0, - "commonvoice_locale": "ru", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4525762744858351, - "model": 1.0 - }, - { - "bcp_47": "rue", - "speakers": 527075, - "language_name": "Rusyn", - "autonym": "Rusyn", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rug", - "speakers": 9591, - "language_name": "Roviana", - "autonym": "Roviana", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rw", - "speakers": 11083625, - "language_name": "Kinyarwanda", - "autonym": "Kinyarwanda", - "family": "Atlantic-Congo", - "flores_path": "kin_Latn", - "fleurs_tag": null, - "commonvoice_hours": 2002.0, - "commonvoice_locale": "rw", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "rwk", - "speakers": 128816, - "language_name": "Rwa", - "autonym": "Kiruwa", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ryu", - "speakers": 966404, - "language_name": "Central Okinawan", - "autonym": "Central Okinawan", - "family": "Japonic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sa", - "speakers": 15913, - "language_name": "Sanskrit", - "autonym": "संस्कृत भाषा", - "family": "Indo-European", - "flores_path": "san_Deva", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "saf", - "speakers": 4108, - "language_name": "Safaliba", - "autonym": "Safaliba", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sah", - "speakers": 453510, - "language_name": "Yakut", - "autonym": "Саха Тыла", - "family": "Turkic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 11.0, - "commonvoice_locale": "sah", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "saq", - "speakers": 246228, - "language_name": "Samburu", - "autonym": "Kisampur", - "family": "Nilotic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sas", - "speakers": 2590152, - "language_name": "Sasak", - "autonym": "Sasak", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sat", - "speakers": 7293495, - "language_name": "Santali", - "autonym": "ᱥᱟᱱᱛᱟᱲᱤ", - "family": "Austroasiatic", - "flores_path": "sat_Olck", - "fleurs_tag": null, - "commonvoice_hours": 0.5, - "commonvoice_locale": "sat", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sav", - "speakers": 236046, - "language_name": "Saafi-Saafi", - "autonym": "Saafi-Saafi", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "saz", - "speakers": 384566, - "language_name": "Saurashtra", - "autonym": "Saurashtra", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sbp", - "speakers": 117106, - "language_name": "Sangu", - "autonym": "Ishisangu", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sc", - "speakers": 1060846, - "language_name": "Sardinian", - "autonym": "Sardu", - "family": "Indo-European", - "flores_path": "srd_Latn", - "fleurs_tag": null, - "commonvoice_hours": 2.9, - "commonvoice_locale": "sc", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sck", - "speakers": 2386962, - "language_name": "Sadri", - "autonym": "Sadri", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "scn", - "speakers": 511702, - "language_name": "Sicilian", - "autonym": "Sicilianu", - "family": "Indo-European", - "flores_path": "scn_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "scn", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sco", - "speakers": 1644028, - "language_name": "Scots", - "autonym": "Scots", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "sco", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sd", - "speakers": 40329510, - "language_name": "Sindhi", - "autonym": "سنڌي", - "family": "Indo-European", - "flores_path": "snd_Arab", - "fleurs_tag": "sd_in", - "commonvoice_hours": 0.4, - "commonvoice_locale": "sd", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sdc", - "speakers": 106085, - "language_name": "Sassarese Sardinian", - "autonym": "Sassarese Sardinian", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sdh", - "speakers": 3142162, - "language_name": "Southern Kurdish", - "autonym": "کوردی خوارگ", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "sdh", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "se", - "speakers": 51530, - "language_name": "Northern Sami", - "autonym": "Davvisámegiella", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sef", - "speakers": 1181687, - "language_name": "Cebaara Senoufo", - "autonym": "Cebaara Senoufo", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "seh", - "speakers": 1384517, - "language_name": "Sena", - "autonym": "Sena", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sei", - "speakers": 901, - "language_name": "Seri", - "autonym": "Seri", - "family": null, - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 2.5, - "commonvoice_locale": "sei", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ses", - "speakers": 664816, - "language_name": "Koyraboro Senni", - "autonym": "Koyraboro Senni", - "family": "Songhay", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sg", - "speakers": 2935521, - "language_name": "Sango", - "autonym": "Sängö", - "family": "Atlantic-Congo", - "flores_path": "sag_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sgs", - "speakers": 0, - "language_name": "Samogitian", - "autonym": "Samogitian", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "shi", - "speakers": 6187736, - "language_name": "Tachelhit", - "autonym": "ⵜⴰⵛⵍⵃⵉⵜ", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "shi", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "shn", - "speakers": 3687984, - "language_name": "Shan", - "autonym": "တႆး", - "family": "Tai-Kadai", - "flores_path": "shn_Mymr", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "shn", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "si", - "speakers": 15564656, - "language_name": "Sinhala", - "autonym": "සිංහල", - "family": "Indo-European", - "flores_path": "sin_Sinh", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "si", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sid", - "speakers": 3783955, - "language_name": "Sidamo", - "autonym": "Sidamo", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sk", - "speakers": 6680269, - "language_name": "Slovak", - "autonym": "Slovenčina", - "family": "Indo-European", - "flores_path": "slk_Latn", - "fleurs_tag": "sk_sk", - "commonvoice_hours": 47.0, - "commonvoice_locale": "sk", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "skr", - "speakers": 28020120, - "language_name": "Saraiki", - "autonym": "سرائیکی", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 4.3, - "commonvoice_locale": "skr", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sl", - "speakers": 1973181, - "language_name": "Slovenian", - "autonym": "Slovenščina", - "family": "Indo-European", - "flores_path": "slv_Latn", - "fleurs_tag": "sl_si", - "commonvoice_hours": 17.0, - "commonvoice_locale": "sl", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sli", - "speakers": 11868, - "language_name": "Lower Silesian", - "autonym": "Lower Silesian", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sly", - "speakers": 144194, - "language_name": "Selayar", - "autonym": "Selayar", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sm", - "speakers": 252717, - "language_name": "Samoan", - "autonym": "Samoan", - "family": "Austronesian", - "flores_path": "smo_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sma", - "speakers": 296, - "language_name": "Southern Sami", - "autonym": "Åarjelsaemien Gïele", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "smj", - "speakers": 1530, - "language_name": "Lule Sami", - "autonym": "Julevsámegiella", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "smn", - "speakers": 613, - "language_name": "Inari Sami", - "autonym": "Anarâškielâ", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sms", - "speakers": 613, - "language_name": "Skolt Sami", - "autonym": "SääʹMǩiõll", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sn", - "speakers": 11782503, - "language_name": "Shona", - "autonym": "Chishona", - "family": "Atlantic-Congo", - "flores_path": "sna_Latn", - "fleurs_tag": "sn_zw", - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "snf", - "speakers": 37767, - "language_name": "Noon", - "autonym": "Noon", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "snk", - "speakers": 1153651, - "language_name": "Soninke", - "autonym": "Soninke", - "family": "Mande", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "snk", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "so", - "speakers": 16911645, - "language_name": "Somali", - "autonym": "Soomaali", - "family": "Afro-Asiatic", - "flores_path": "som_Latn", - "fleurs_tag": "so_so", - "commonvoice_hours": 0.0, - "commonvoice_locale": "so", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sou", - "speakers": 5518192, - "language_name": "Southern Thai", - "autonym": "Southern Thai", - "family": "Tai-Kadai", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sq", - "speakers": 6791906, - "language_name": "Albanian", - "autonym": "Shqip", - "family": "Indo-European", - "flores_path": "als_Latn", - "fleurs_tag": null, - "commonvoice_hours": 8.8, - "commonvoice_locale": "sq", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sr", - "speakers": 15602410, - "language_name": "Serbian", - "autonym": "Српски", - "family": "Indo-European", - "flores_path": "srp_Cyrl", - "fleurs_tag": "sr_rs", - "commonvoice_hours": 7.5, - "commonvoice_locale": "sr", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "srn", - "speakers": 414507, - "language_name": "Sranan Tongo", - "autonym": "Sranan Tongo", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "srr", - "speakers": 1731004, - "language_name": "Serer", - "autonym": "Serer", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "srx", - "speakers": 464132, - "language_name": "Sirmauri", - "autonym": "Sirmauri", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ss", - "speakers": 2212379, - "language_name": "Swati", - "autonym": "Siswati", - "family": "Atlantic-Congo", - "flores_path": "ssw_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "ss", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ssy", - "speakers": 218923, - "language_name": "Saho", - "autonym": "Saho", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "st", - "speakers": 6390567, - "language_name": "Southern Sotho", - "autonym": "Sesotho", - "family": "Atlantic-Congo", - "flores_path": "sot_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "st", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "stq", - "speakers": 962, - "language_name": "Saterland Frisian", - "autonym": "Saterland Frisian", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "su", - "speakers": 32043120, - "language_name": "Sundanese", - "autonym": "Basa Sunda", - "family": "Austronesian", - "flores_path": "sun_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "suk", - "speakers": 5094094, - "language_name": "Sukuma", - "autonym": "Sukuma", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sus", - "speakers": 1378014, - "language_name": "Susu", - "autonym": "Susu", - "family": "Mande", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sv", - "speakers": 12932871, - "language_name": "Swedish", - "autonym": "Svenska", - "family": "Indo-European", - "flores_path": "swe_Latn", - "fleurs_tag": "sv_se", - "commonvoice_hours": 47.0, - "commonvoice_locale": "sv-SE", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sw", - "speakers": 171610296, - "language_name": "Swahili", - "autonym": "Kiswahili", - "family": "Atlantic-Congo", - "flores_path": "swh_Latn", - "fleurs_tag": "sw_ke", - "commonvoice_hours": 411.0, - "commonvoice_locale": "sw", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "model": 1.0 - }, - { - "bcp_47": "sw", - "speakers": 171610296, - "language_name": "Swahili", - "autonym": "Kiswahili", - "family": "Atlantic-Congo", - "flores_path": "swh_Latn", - "fleurs_tag": "sw_ke", - "commonvoice_hours": 411.0, - "commonvoice_locale": "sw", - "in_benchmark": true, - "task": "language_modeling", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9401745263817055, - "model": 1.0 - }, + "score": 0.5010353699512481, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "sw", - "speakers": 171610296, - "language_name": "Swahili", - "autonym": "Kiswahili", - "family": "Atlantic-Congo", - "flores_path": "swh_Latn", - "fleurs_tag": "sw_ke", - "commonvoice_hours": 411.0, - "commonvoice_locale": "sw", - "in_benchmark": true, + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.30022614000139736, - "model": 1.0 - }, - { - "bcp_47": "sw", - "speakers": 171610296, - "language_name": "Swahili", - "autonym": "Kiswahili", - "family": "Atlantic-Congo", - "flores_path": "swh_Latn", - "fleurs_tag": "sw_ke", - "commonvoice_hours": 411.0, - "commonvoice_locale": "sw", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.46620999112028233, - "model": 1.0 - }, - { - "bcp_47": "swb", - "speakers": 170720, - "language_name": "Comorian", - "autonym": "Comorian", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "swg", - "speakers": 801597, - "language_name": "Swabian", - "autonym": "Swabian", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "swv", - "speakers": 3713052, - "language_name": "Shekhawati", - "autonym": "Shekhawati", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "sxn", - "speakers": 245664, - "language_name": "Sangir", - "autonym": "Sangir", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "syl", - "speakers": 8132550, - "language_name": "Sylheti", - "autonym": "Sylheti", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "syr", - "speakers": 210659, - "language_name": "Syriac", - "autonym": "ܣܘܪܝܝܐ", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "syr", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "szl", - "speakers": 497670, - "language_name": "Silesian", - "autonym": "Ślōnski", - "family": "Indo-European", - "flores_path": "szl_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ta", - "speakers": 85616159, - "language_name": "Tamil", - "autonym": "தமிழ்", - "family": "Dravidian", - "flores_path": "tam_Taml", - "fleurs_tag": "ta_in", - "commonvoice_hours": 234.0, - "commonvoice_locale": "ta", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "model": 1.0 + "score": 0.12369892692249995, + "sentence_nr": 1 }, { - "bcp_47": "ta", - "speakers": 85616159, - "language_name": "Tamil", - "autonym": "தமிழ்", - "family": "Dravidian", - "flores_path": "tam_Taml", - "fleurs_tag": "ta_in", - "commonvoice_hours": 234.0, - "commonvoice_locale": "ta", - "in_benchmark": true, - "task": "language_modeling", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9641464401452432, - "model": 1.0 - }, + "score": 0.44549610902403686, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "ta", - "speakers": 85616159, - "language_name": "Tamil", - "autonym": "தமிழ்", - "family": "Dravidian", - "flores_path": "tam_Taml", - "fleurs_tag": "ta_in", - "commonvoice_hours": 234.0, - "commonvoice_locale": "ta", - "in_benchmark": true, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.2528104486727614, - "model": 1.0 - }, - { - "bcp_47": "ta", - "speakers": 85616159, - "language_name": "Tamil", - "autonym": "தமிழ்", - "family": "Dravidian", - "flores_path": "tam_Taml", - "fleurs_tag": "ta_in", - "commonvoice_hours": 234.0, - "commonvoice_locale": "ta", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.42077045938882934, - "model": 1.0 - }, - { - "bcp_47": "taj", - "speakers": 130410, - "language_name": "Eastern Tamang", - "autonym": "Eastern Tamang", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tbw", - "speakers": 10045, - "language_name": "Tagbanwa", - "autonym": "Tagbanwa", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tcy", - "speakers": 1989135, - "language_name": "Tulu", - "autonym": "Tulu", - "family": "Dravidian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tdd", - "speakers": 264864, - "language_name": "Tai Nüa", - "autonym": "Tai Nüa", - "family": "Tai-Kadai", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tdg", - "speakers": 394263, - "language_name": "Western Tamang", - "autonym": "Western Tamang", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tdh", - "speakers": 36393, - "language_name": "Thulung", - "autonym": "Thulung", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "te", - "speakers": 95478480, - "language_name": "Telugu", - "autonym": "తెలుగు", - "family": "Dravidian", - "flores_path": "tel_Telu", - "fleurs_tag": "te_in", - "commonvoice_hours": 0.3, - "commonvoice_locale": "te", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.36666666666666664, - "model": 1.0 + "score": 0.12560672881768975, + "sentence_nr": 1 }, { - "bcp_47": "te", - "speakers": 95478480, - "language_name": "Telugu", - "autonym": "తెలుగు", - "family": "Dravidian", - "flores_path": "tel_Telu", - "fleurs_tag": "te_in", - "commonvoice_hours": 0.3, - "commonvoice_locale": "te", - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9513838296654142, - "model": 1.0 - }, + "score": 0.4969560260291519, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "te", - "speakers": 95478480, - "language_name": "Telugu", - "autonym": "తెలుగు", - "family": "Dravidian", - "flores_path": "tel_Telu", - "fleurs_tag": "te_in", - "commonvoice_hours": 0.3, - "commonvoice_locale": "te", - "in_benchmark": true, + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.28338449781147135, - "model": 1.0 - }, - { - "bcp_47": "te", - "speakers": 95478480, - "language_name": "Telugu", - "autonym": "తెలుగు", - "family": "Dravidian", - "flores_path": "tel_Telu", - "fleurs_tag": "te_in", - "commonvoice_hours": 0.3, - "commonvoice_locale": "te", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.467003606031036, - "model": 1.0 - }, - { - "bcp_47": "tem", - "speakers": 1722482, - "language_name": "Timne", - "autonym": "Timne", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "teo", - "speakers": 2082973, - "language_name": "Teso", - "autonym": "Kiteso", - "family": "Nilotic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tet", - "speakers": 816395, - "language_name": "Tetum", - "autonym": "Tetum", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tg", - "speakers": 9644223, - "language_name": "Tajik", - "autonym": "Тоҷикӣ", - "family": "Indo-European", - "flores_path": "tgk_Cyrl", - "fleurs_tag": "tg_tj", - "commonvoice_hours": 0.0, - "commonvoice_locale": "tg", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "th", - "speakers": 55181920, - "language_name": "Thai", - "autonym": "ไทย", - "family": "Tai-Kadai", - "flores_path": "tha_Thai", - "fleurs_tag": "th_th", - "commonvoice_hours": 172.0, - "commonvoice_locale": "th", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "thl", - "speakers": 606558, - "language_name": "Dangaura Tharu", - "autonym": "Dangaura Tharu", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "thq", - "speakers": 303279, - "language_name": "Kochila Tharu", - "autonym": "Kochila Tharu", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "thr", - "speakers": 363935, - "language_name": "Rana Tharu", - "autonym": "Rana Tharu", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ti", - "speakers": 10145911, - "language_name": "Tigrinya", - "autonym": "ትግርኛ", - "family": "Afro-Asiatic", - "flores_path": "tir_Ethi", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "ti", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tig", - "speakers": 1094616, - "language_name": "Tigre", - "autonym": "Tigre", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 11.0, - "commonvoice_locale": "tig", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tiv", - "speakers": 3424448, - "language_name": "Tiv", - "autonym": "Tiv", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tk", - "speakers": 6870838, - "language_name": "Turkmen", - "autonym": "Türkmen Dili", - "family": "Turkic", - "flores_path": "tuk_Latn", - "fleurs_tag": null, - "commonvoice_hours": 2.8, - "commonvoice_locale": "tk", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tkl", - "speakers": 1285, - "language_name": "Tokelau", - "autonym": "Tokelau", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tkr", - "speakers": 16329, - "language_name": "Tsakhur", - "autonym": "Tsakhur", - "family": "Nakh-Daghestanian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tkt", - "speakers": 72787, - "language_name": "Kathoriya Tharu", - "autonym": "Kathoriya Tharu", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tly", - "speakers": 1000168, - "language_name": "Talysh", - "autonym": "Talysh", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tmh", - "speakers": 1776965, - "language_name": "Tamashek", - "autonym": "Tamashek", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tn", - "speakers": 6113428, - "language_name": "Tswana", - "autonym": "Tswana", - "family": "Atlantic-Congo", - "flores_path": "tsn_Latn", - "fleurs_tag": null, - "commonvoice_hours": 4.2, - "commonvoice_locale": "tn", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tnr", - "speakers": 3305, - "language_name": "Ménik", - "autonym": "Ménik", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "to", - "speakers": 100790, - "language_name": "Tongan", - "autonym": "Lea Fakatonga", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tog", - "speakers": 207727, - "language_name": "Nyasa Tonga", - "autonym": "Nyasa Tonga", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tpi", - "speakers": 5154217, - "language_name": "Tok Pisin", - "autonym": "Tok Pisin", - "family": "Indo-European", - "flores_path": "tpi_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null + "score": 0.17077058518804336, + "sentence_nr": 1 }, { - "bcp_47": "tr", - "speakers": 80360704, - "language_name": "Turkish", - "autonym": "Türkçe", - "family": "Turkic", - "flores_path": "tur_Latn", - "fleurs_tag": "tr_tr", - "commonvoice_hours": 128.0, - "commonvoice_locale": "tr", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "model": 1.0 - }, - { - "bcp_47": "tr", - "speakers": 80360704, - "language_name": "Turkish", - "autonym": "Türkçe", - "family": "Turkic", - "flores_path": "tur_Latn", - "fleurs_tag": "tr_tr", - "commonvoice_hours": 128.0, - "commonvoice_locale": "tr", - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9446670623712353, - "model": 1.0 - }, + "score": 0.5022008374701596, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "tr", - "speakers": 80360704, - "language_name": "Turkish", - "autonym": "Türkçe", - "family": "Turkic", - "flores_path": "tur_Latn", - "fleurs_tag": "tr_tr", - "commonvoice_hours": 128.0, - "commonvoice_locale": "tr", - "in_benchmark": true, + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.2844596261435892, - "model": 1.0 - }, - { - "bcp_47": "tr", - "speakers": 80360704, - "language_name": "Turkish", - "autonym": "Türkçe", - "family": "Turkic", - "flores_path": "tur_Latn", - "fleurs_tag": "tr_tr", - "commonvoice_hours": 128.0, - "commonvoice_locale": "tr", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4525478264239361, - "model": 1.0 - }, - { - "bcp_47": "tru", - "speakers": 3035, - "language_name": "Turoyo", - "autonym": "Turoyo", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "trv", - "speakers": 4721, - "language_name": "Taroko", - "autonym": "Patas Taroko", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 10.0, - "commonvoice_locale": "trv", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "trw", - "speakers": 123756, - "language_name": "Torwali", - "autonym": "توروالی", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 19.0, - "commonvoice_locale": "trw", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ts", - "speakers": 4880932, - "language_name": "Tsonga", - "autonym": "Tsonga", - "family": "Atlantic-Congo", - "flores_path": "tso_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "ts", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tsd", - "speakers": 202, - "language_name": "Tsakonian", - "autonym": "Tsakonian", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tsg", - "speakers": 1200991, - "language_name": "Tausug", - "autonym": "Tausug", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tsj", - "speakers": 117348, - "language_name": "Tshangla", - "autonym": "Tshangla", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tt", - "speakers": 1984108, - "language_name": "Tatar", - "autonym": "Татар", - "family": "Turkic", - "flores_path": "tat_Cyrl", - "fleurs_tag": null, - "commonvoice_hours": 32.0, - "commonvoice_locale": "tt", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ttj", - "speakers": 821807, - "language_name": "Tooro", - "autonym": "Tooro", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tts", - "speakers": 16554576, - "language_name": "Northeastern Thai", - "autonym": "Northeastern Thai", - "family": "Tai-Kadai", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ttt", - "speakers": 22453, - "language_name": "Muslim Tat", - "autonym": "Muslim Tat", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tum", - "speakers": 1780514, - "language_name": "Tumbuka", - "autonym": "Tumbuka", - "family": "Atlantic-Congo", - "flores_path": "tum_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tvl", - "speakers": 9868, - "language_name": "Tuvalu", - "autonym": "Tuvalu", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "twq", - "speakers": 7970, - "language_name": "Tasawaq", - "autonym": "Tasawaq Senni", - "family": "Songhay", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ty", - "speakers": 91488, - "language_name": "Tahitian", - "autonym": "Tahitian", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "ty", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tyv", - "speakers": 184239, - "language_name": "Tuvinian", - "autonym": "Tuvinian", - "family": "Turkic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "tyv", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "tzm", - "speakers": 3485047, - "language_name": "Central Atlas Tamazight", - "autonym": "Tamaziɣt N Laṭlaṣ", - "family": "Afro-Asiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "udm", - "speakers": 538544, - "language_name": "Udmurt", - "autonym": "Udmurt", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "udm", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ug", - "speakers": 8052967, - "language_name": "Uyghur", - "autonym": "ئۇيغۇرچە", - "family": "Turkic", - "flores_path": "uig_Arab", - "fleurs_tag": null, - "commonvoice_hours": 365.0, - "commonvoice_locale": "ug", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "uk", - "speakers": 29348975, - "language_name": "Ukrainian", - "autonym": "Українська", - "family": "Indo-European", - "flores_path": "ukr_Cyrl", - "fleurs_tag": "uk_ua", - "commonvoice_hours": 99.0, - "commonvoice_locale": "uk", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "uli", - "speakers": 2971, - "language_name": "Ulithian", - "autonym": "Ulithian", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "umb", - "speakers": 9431467, - "language_name": "Umbundu", - "autonym": "Umbundu", - "family": "Atlantic-Congo", - "flores_path": "umb_Latn", - "fleurs_tag": "umb_ao", - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "unr", - "speakers": 1252287, - "language_name": "Mundari", - "autonym": "Mundari", - "family": "Austroasiatic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "unx", - "speakers": 636523, - "language_name": "Munda", - "autonym": "Munda", - "family": "Bookkeeping", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ur", - "speakers": 290790290, - "language_name": "Urdu", - "autonym": "اردو", - "family": "Indo-European", - "flores_path": "urd_Arab", - "fleurs_tag": "ur_pk", - "commonvoice_hours": 77.0, - "commonvoice_locale": "ur", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5, - "model": 1.0 + "score": 0.10784756064735967, + "sentence_nr": 1 }, { - "bcp_47": "ur", - "speakers": 290790290, - "language_name": "Urdu", - "autonym": "اردو", - "family": "Indo-European", - "flores_path": "urd_Arab", - "fleurs_tag": "ur_pk", - "commonvoice_hours": 77.0, - "commonvoice_locale": "ur", - "in_benchmark": true, - "task": "language_modeling", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9182703887696211, - "model": 1.0 - }, + "score": 0.4427230465401631, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "ur", - "speakers": 290790290, - "language_name": "Urdu", - "autonym": "اردو", - "family": "Indo-European", - "flores_path": "urd_Arab", - "fleurs_tag": "ur_pk", - "commonvoice_hours": 77.0, - "commonvoice_locale": "ur", - "in_benchmark": true, + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.2434522914619727, - "model": 1.0 - }, - { - "bcp_47": "ur", - "speakers": 290790290, - "language_name": "Urdu", - "autonym": "اردو", - "family": "Indo-European", - "flores_path": "urd_Arab", - "fleurs_tag": "ur_pk", - "commonvoice_hours": 77.0, - "commonvoice_locale": "ur", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.42196914378741973, - "model": 1.0 - }, - { - "bcp_47": "uz", - "speakers": 32792780, - "language_name": "Uzbek", - "autonym": "O‘Zbek", - "family": "Turkic", - "flores_path": "uzn_Latn", - "fleurs_tag": "uz_uz", - "commonvoice_hours": 100.0, - "commonvoice_locale": "uz", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "vai", - "speakers": 131906, - "language_name": "Vai", - "autonym": "ꕙꔤ", - "family": "Mande", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ve", - "speakers": 1391759, - "language_name": "Venda", - "autonym": "Tshivenḓa", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "ve", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "vec", - "speakers": 1380829, - "language_name": "Venetian", - "autonym": "Veneto", - "family": "Indo-European", - "flores_path": "vec_Latn", - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "vec", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "vep", - "speakers": 3543, - "language_name": "Veps", - "autonym": "Veps", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "vi", - "speakers": 86222962, - "language_name": "Vietnamese", - "autonym": "Tiếng Việt", - "family": "Austroasiatic", - "flores_path": "vie_Latn", - "fleurs_tag": "vi_vn", - "commonvoice_hours": 6.0, - "commonvoice_locale": "vi", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "model": 1.0 + "score": 0.06735571462439276, + "sentence_nr": 1 }, { - "bcp_47": "vi", - "speakers": 86222962, - "language_name": "Vietnamese", - "autonym": "Tiếng Việt", - "family": "Austroasiatic", - "flores_path": "vie_Latn", - "fleurs_tag": "vi_vn", - "commonvoice_hours": 6.0, - "commonvoice_locale": "vi", - "in_benchmark": true, - "task": "language_modeling", + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9580044761495575, - "model": 1.0 - }, + "score": 0.38102852892512806, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "vi", - "speakers": 86222962, - "language_name": "Vietnamese", - "autonym": "Tiếng Việt", - "family": "Austroasiatic", - "flores_path": "vie_Latn", - "fleurs_tag": "vi_vn", - "commonvoice_hours": 6.0, - "commonvoice_locale": "vi", - "in_benchmark": true, + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.25541664062319624, - "model": 1.0 - }, - { - "bcp_47": "vi", - "speakers": 86222962, - "language_name": "Vietnamese", - "autonym": "Tiếng Việt", - "family": "Austroasiatic", - "flores_path": "vie_Latn", - "fleurs_tag": "vi_vn", - "commonvoice_hours": 6.0, - "commonvoice_locale": "vi", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.43688984396841446, - "model": 1.0 - }, - { - "bcp_47": "vic", - "speakers": 3113, - "language_name": "Virgin Islands Creole English", - "autonym": "Virgin Islands Creole English", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "vls", - "speakers": 1172070, - "language_name": "West Flemish", - "autonym": "West Flemish", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "vmf", - "speakers": 4809582, - "language_name": "Main-Franconian", - "autonym": "Main-Franconian", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "vmw", - "speakers": 3912766, - "language_name": "Makhuwa", - "autonym": "Emakhuwa", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.0, - "commonvoice_locale": "vmw", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "vot", - "speakers": 0, - "language_name": "Votic", - "autonym": "Votic", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 0.1, - "commonvoice_locale": "vot", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "vro", - "speakers": 70031, - "language_name": "Võro", - "autonym": "Võro", - "family": "Uralic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "vun", - "speakers": 433291, - "language_name": "Vunjo", - "autonym": "Kyivunjo", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "wa", - "speakers": 679801, - "language_name": "Walloon", - "autonym": "Walon", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "wae", - "speakers": 11377, - "language_name": "Walser", - "autonym": "Walser", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "wal", - "speakers": 1946034, - "language_name": "Wolaytta", - "autonym": "Wolaytta", - "family": "Ta-Ne-Omotic", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "war", - "speakers": 3166927, - "language_name": "Waray", - "autonym": "Waray", - "family": "Austronesian", - "flores_path": "war_Latn", - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "wbp", - "speakers": 2496, - "language_name": "Warlpiri", - "autonym": "Warlpiri", - "family": "Pama-Nyungan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "wbq", - "speakers": 2386962, - "language_name": "Waddar", - "autonym": "Waddar", - "family": "Dravidian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "wbr", - "speakers": 1989135, - "language_name": "Wagdi", - "autonym": "Wagdi", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "wls", - "speakers": 9512, - "language_name": "Wallisian", - "autonym": "Wallisian", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "wni", - "speakers": 287736, - "language_name": "Ndzwani Comorian", - "autonym": "Ndzwani Comorian", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "wo", - "speakers": 11025494, - "language_name": "Wolof", - "autonym": "Wolof", - "family": "Atlantic-Congo", - "flores_path": "wol_Latn", - "fleurs_tag": "wo_sn", - "commonvoice_hours": 0.0, - "commonvoice_locale": "wo", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "wtm", - "speakers": 6100014, - "language_name": "Mewati", - "autonym": "Mewati", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "wuu", - "speakers": 83641200, - "language_name": "Wu Chinese", - "autonym": "Wu Chinese", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "xav", - "speakers": 9951, - "language_name": "Xavánte", - "autonym": "Xavánte", - "family": "Nuclear-Macro-Je", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "xh", - "speakers": 10182944, - "language_name": "Xhosa", - "autonym": "Isixhosa", - "family": "Atlantic-Congo", - "flores_path": "xho_Latn", - "fleurs_tag": "xh_za", - "commonvoice_hours": 0.0, - "commonvoice_locale": "xh", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "xmf", - "speakers": 439670, - "language_name": "Mingrelian", - "autonym": "Mingrelian", - "family": "Kartvelian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 11.0, - "commonvoice_locale": "xmf", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "xnr", - "speakers": 2121744, - "language_name": "Kangri", - "autonym": "कांगड़ी", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "xog", - "speakers": 2292409, - "language_name": "Soga", - "autonym": "Olusoga", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "xsr", - "speakers": 157705, - "language_name": "Sherpa", - "autonym": "Sherpa", - "family": "Sino-Tibetan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "yao", - "speakers": 722357, - "language_name": "Yao", - "autonym": "Yao", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "yap", - "speakers": 6556, - "language_name": "Yapese", - "autonym": "Yapese", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "yav", - "speakers": 2303, - "language_name": "Yangben", - "autonym": "Nuasue", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "ybb", - "speakers": 443920, - "language_name": "Yemba", - "autonym": "Yemba", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "yi", - "speakers": 997214, - "language_name": "Yiddish", - "autonym": "ייִדיש", - "family": "Indo-European", - "flores_path": "ydd_Hebr", - "fleurs_tag": null, - "commonvoice_hours": 0.5, - "commonvoice_locale": "yi", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "yo", - "speakers": 28685568, - "language_name": "Yoruba", - "autonym": "Èdè Yorùbá", - "family": "Atlantic-Congo", - "flores_path": "yor_Latn", - "fleurs_tag": "yo_ng", - "commonvoice_hours": 6.0, - "commonvoice_locale": "yo", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "yrl", - "speakers": 26171, - "language_name": "Nheengatu", - "autonym": "Nheẽgatu", - "family": "Tupian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "yua", - "speakers": 861955, - "language_name": "Yucateco", - "autonym": "Yucateco", - "family": "Mayan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "yue", - "speakers": 79654759, - "language_name": "Cantonese", - "autonym": "粵語", - "family": "Sino-Tibetan", - "flores_path": "yue_Hant", - "fleurs_tag": "yue_hant_hk", - "commonvoice_hours": 203.0, - "commonvoice_locale": "yue", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "model": 1.0 + "score": 0.0, + "sentence_nr": 1 }, { - "bcp_47": "yue", - "speakers": 79654759, - "language_name": "Cantonese", - "autonym": "粵語", - "family": "Sino-Tibetan", - "flores_path": "yue_Hant", - "fleurs_tag": "yue_hant_hk", - "commonvoice_hours": 203.0, - "commonvoice_locale": "yue", - "in_benchmark": true, - "task": "language_modeling", + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9022415223117748, - "model": 1.0 - }, + "score": 0.42723260976616784, + "sentence_nr": 1 + } + ], + [ { - "bcp_47": "yue", - "speakers": 79654759, - "language_name": "Cantonese", - "autonym": "粵語", - "family": "Sino-Tibetan", - "flores_path": "yue_Hant", - "fleurs_tag": "yue_hant_hk", - "commonvoice_hours": 203.0, - "commonvoice_locale": "yue", - "in_benchmark": true, + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.22628178945118504, - "model": 1.0 + "score": 0.1694466724647263, + "sentence_nr": 1 }, { - "bcp_47": "yue", - "speakers": 79654759, - "language_name": "Cantonese", - "autonym": "粵語", - "family": "Sino-Tibetan", - "flores_path": "yue_Hant", - "fleurs_tag": "yue_hant_hk", - "commonvoice_hours": 203.0, - "commonvoice_locale": "yue", - "in_benchmark": true, - "task": "translation", - "metric": "chrf", - "score": 0.4052050858435178, - "model": 1.0 - }, - { - "bcp_47": "za", - "speakers": 4321462, - "language_name": "Zhuang", - "autonym": "Vahcuengh", - "family": "Tai-Kadai", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "zag", - "speakers": 232364, - "language_name": "Zaghawa", - "autonym": "Zaghawa", - "family": "Saharan", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "zdj", - "speakers": 313124, - "language_name": "Ngazidja Comorian", - "autonym": "Ngazidja Comorian", - "family": "Atlantic-Congo", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "zea", - "speakers": 241926, - "language_name": "Zeelandic", - "autonym": "Zeelandic", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "zgh", - "speakers": 7823574, - "language_name": "Standard Moroccan Tamazight", - "autonym": "ⵜⴰⵎⴰⵣⵉⵖⵜ", - "family": "Afro-Asiatic", - "flores_path": "zgh_Tfng", - "fleurs_tag": null, - "commonvoice_hours": 1.3, - "commonvoice_locale": "zgh", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "zh", - "speakers": 1304678914, - "language_name": "Chinese", - "autonym": "中文", - "family": "Sino-Tibetan", - "flores_path": "cmn_Hans", - "fleurs_tag": "cmn_hans_cn", - "commonvoice_hours": 422.0, - "commonvoice_locale": "zh-TW", - "in_benchmark": true, - "task": "classification", - "metric": "accuracy", - "score": 0.5499999999999999, - "model": 12.0 - }, - { - "bcp_47": "zh", - "speakers": 1304678914, - "language_name": "Chinese", - "autonym": "中文", - "family": "Sino-Tibetan", - "flores_path": "cmn_Hans", - "fleurs_tag": "cmn_hans_cn", - "commonvoice_hours": 422.0, - "commonvoice_locale": "zh-TW", - "in_benchmark": true, - "task": "language_modeling", - "metric": "chrf", - "score": 0.862116462347859, - "model": 12.0 - }, + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4902502031746037, + "sentence_nr": 1 + } + ], + [ { + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", - "speakers": 1304678914, - "language_name": "Chinese", - "autonym": "中文", - "family": "Sino-Tibetan", - "flores_path": "cmn_Hans", - "fleurs_tag": "cmn_hans_cn", - "commonvoice_hours": 422.0, - "commonvoice_locale": "zh-TW", - "in_benchmark": true, "task": "translation", "metric": "bleu", - "score": 0.3532292543512247, - "model": 12.0 + "score": 0.0, + "sentence_nr": 1 }, { + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", - "speakers": 1304678914, - "language_name": "Chinese", - "autonym": "中文", - "family": "Sino-Tibetan", - "flores_path": "cmn_Hans", - "fleurs_tag": "cmn_hans_cn", - "commonvoice_hours": 422.0, - "commonvoice_locale": "zh-TW", - "in_benchmark": true, "task": "translation", "metric": "chrf", - "score": 0.529398790799104, - "model": 12.0 - }, - { - "bcp_47": "zmi", - "speakers": 391825, - "language_name": "Negeri Sembilan Malay", - "autonym": "Negeri Sembilan Malay", - "family": "Austronesian", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": null, - "commonvoice_locale": null, - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "zu", - "speakers": 13973830, - "language_name": "Zulu", - "autonym": "Isizulu", - "family": "Atlantic-Congo", - "flores_path": "zul_Latn", - "fleurs_tag": "zu_za", - "commonvoice_hours": 0.0, - "commonvoice_locale": "zu", - "in_benchmark": true, - "task": null, - "metric": null, - "score": null, - "model": null - }, - { - "bcp_47": "zza", - "speakers": 1148245, - "language_name": "Zaza", - "autonym": "Zaza", - "family": "Indo-European", - "flores_path": null, - "fleurs_tag": null, - "commonvoice_hours": 1.8, - "commonvoice_locale": "zza", - "in_benchmark": false, - "task": null, - "metric": null, - "score": null, - "model": null + "score": 0.3532931581623198, + "sentence_nr": 1 } ], - "scores": [ + [ { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "sentence_nr": 14.5 + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.175396614619324, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9596433057062642, - "sentence_nr": 14.5 - }, + "score": 0.49736499605529066, + "sentence_nr": 1 + } + ], + [ { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.4491277841667736, - "sentence_nr": 14.5 + "score": 0.15154395847232716, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.5740458676508566, - "sentence_nr": 14.5 - }, + "score": 0.46053919348995803, + "sentence_nr": 1 + } + ], + [ { - "model": "amazon/nova-micro-v1", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0.5, - "sentence_nr": 14.5 + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "language_modeling", + "task": "translation", "metric": "chrf", - "score": 0.9120424742302929, - "sentence_nr": 14.5 - }, + "score": 0.4041678259311437, + "sentence_nr": 1 + } + ], + [ { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.3517140864634192, - "sentence_nr": 14.5 + "score": 0.1290514243115152, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.5543825716892707, - "sentence_nr": 14.5 - }, + "score": 0.4766581477336301, + "sentence_nr": 1 + } + ], + [ { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0.8666666666666667, - "sentence_nr": 14.5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.08273178236238297, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", "metric": "chrf", - "score": 0.9854618933889567, - "sentence_nr": 14.5 - }, + "score": 0.36399666460809255, + "sentence_nr": 1 + } + ], + [ { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.4939697152970565, - "sentence_nr": 14.5 + "score": 0.12601482779921785, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.6050713247311065, - "sentence_nr": 14.5 - }, + "score": 0.43595665254608706, + "sentence_nr": 1 + } + ], + [ { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0.8666666666666667, - "sentence_nr": 14.5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.40959087443621306, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", "metric": "chrf", - "score": 0.93160311345022, - "sentence_nr": 14.5 - }, + "score": 0.6348509381122925, + "sentence_nr": 1 + } + ], + [ { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.41074275566059204, - "sentence_nr": 14.5 + "score": 0.08214106568089705, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.5606266861920302, - "sentence_nr": 14.5 - }, + "score": 0.3969463877642616, + "sentence_nr": 1 + } + ], + [ { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0.7333333333333333, - "sentence_nr": 14.5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0744904632040495, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", "metric": "chrf", - "score": 0.9840731165453181, - "sentence_nr": 14.5 - }, + "score": 0.4111163205685468, + "sentence_nr": 1 + } + ], + [ { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.37911136698810943, - "sentence_nr": 14.5 + "score": 0.12894104034845807, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.5094402087357145, - "sentence_nr": 14.5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0.7333333333333333, - "sentence_nr": 14.5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9308727688438002, - "sentence_nr": 14.5 - }, + "score": 0.4486368934849452, + "sentence_nr": 1 + } + ], + [ { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.4225918163141283, - "sentence_nr": 14.5 + "score": 0.10070927557742705, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.5751241490536672, - "sentence_nr": 14.5 - }, + "score": 0.43718220262892105, + "sentence_nr": 1 + } + ], + [ { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0.7, - "sentence_nr": 14.5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0772718393063023, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", "metric": "chrf", - "score": 0.9705999747171945, - "sentence_nr": 14.5 - }, + "score": 0.4203683137304257, + "sentence_nr": 1 + } + ], + [ { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.4234349336981381, - "sentence_nr": 14.5 + "score": 0.0756907193511249, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.5544956505144546, - "sentence_nr": 14.5 - }, + "score": 0.4138725093679467, + "sentence_nr": 1 + } + ], + [ { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0.7333333333333333, - "sentence_nr": 14.5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21748353646757182, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", "metric": "chrf", - "score": 0.9098214011017126, - "sentence_nr": 14.5 - }, + "score": 0.4462746462826943, + "sentence_nr": 1 + } + ], + [ { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.32628982579803617, - "sentence_nr": 14.5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.520771580386218, - "sentence_nr": 14.5 - }, + "score": 0.4179644538349004, + "sentence_nr": 1 + } + ], + [ { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0.8333333333333334, - "sentence_nr": 14.5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.10505106462290037, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", "metric": "chrf", - "score": 0.9802918604410554, - "sentence_nr": 14.5 - }, + "score": 0.4474870048911137, + "sentence_nr": 1 + } + ], + [ { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.18722412351358647, - "sentence_nr": 14.5 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.34151371128305424, - "sentence_nr": 14.5 - }, + "score": 0.0009218289085545725, + "sentence_nr": 1 + } + ], + [ { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0.7666666666666667, - "sentence_nr": 14.5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.15653859793617866, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", "metric": "chrf", - "score": 0.9309062045243536, - "sentence_nr": 14.5 - }, + "score": 0.43177798053127925, + "sentence_nr": 1 + } + ], + [ { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.3157439141709964, - "sentence_nr": 14.5 + "score": 0.0891537192318598, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.5156364087365835, - "sentence_nr": 14.5 - }, + "score": 0.3970634926176537, + "sentence_nr": 1 + } + ], + [ { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0.7, - "sentence_nr": 14.5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0950136506275681, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", "metric": "chrf", - "score": 0.9683113542489836, - "sentence_nr": 14.5 - }, + "score": 0.4372017487229785, + "sentence_nr": 1 + } + ], + [ { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.444507882217401, - "sentence_nr": 14.5 + "score": 0.1259356760989446, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.5485685299214524, - "sentence_nr": 14.5 - }, + "score": 0.44568274520971096, + "sentence_nr": 1 + } + ], + [ { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "sentence_nr": 14.5 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.16322494183480127, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", "metric": "chrf", - "score": 0.9233418215760759, - "sentence_nr": 14.5 - }, + "score": 0.4815584993817062, + "sentence_nr": 1 + } + ], + [ { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.4192089568216648, - "sentence_nr": 14.5 + "score": 0.0904087252785689, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.5873498820054043, - "sentence_nr": 14.5 - }, + "score": 0.41830513174690515, + "sentence_nr": 1 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "sentence_nr": 14.5 + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.21351902664706998, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", + "bcp_47": "tr", + "task": "translation", "metric": "chrf", - "score": 0.9392314289764625, - "sentence_nr": 14.5 - }, + "score": 0.5130443042033361, + "sentence_nr": 1 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.2837250166554738, - "sentence_nr": 14.5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.4684314458952127, - "sentence_nr": 14.5 - }, + "score": 0.16269986423611488, + "sentence_nr": 1 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "sentence_nr": 14.5 + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", + "bcp_47": "ko", + "task": "translation", "metric": "chrf", - "score": 0.930329195667362, - "sentence_nr": 14.5 - }, + "score": 0.06939838145153245, + "sentence_nr": 1 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.19793415292805128, - "sentence_nr": 14.5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.3881278724939126, - "sentence_nr": 14.5 - }, + "score": 0.3371547585108182, + "sentence_nr": 1 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "sentence_nr": 14.5 + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.1691386174483793, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", + "bcp_47": "fil", + "task": "translation", "metric": "chrf", - "score": 0.9097658392566466, - "sentence_nr": 14.5 - }, + "score": 0.4920789340026317, + "sentence_nr": 1 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.20360260890869705, - "sentence_nr": 14.5 + "score": 0.14944432524273302, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.4076175886917154, - "sentence_nr": 14.5 - }, + "score": 0.4972796478830659, + "sentence_nr": 1 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0.6666666666666666, - "sentence_nr": 14.5 + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.09793316925795417, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", + "bcp_47": "gu", + "task": "translation", "metric": "chrf", - "score": 0.9594656177914042, - "sentence_nr": 14.5 - }, + "score": 0.4297577431879659, + "sentence_nr": 1 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.3207642359472324, - "sentence_nr": 14.5 + "score": 0.38870674200492367, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.49973270743869647, - "sentence_nr": 14.5 - }, + "score": 0.6484380084879691, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "sentence_nr": 14.5 + "task": "translation", + "metric": "bleu", + "score": 0.4923751299732868, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "language_modeling", + "task": "translation", "metric": "chrf", - "score": 0.9810108607325073, - "sentence_nr": 14.5 - }, + "score": 0.6853756490381199, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.4082092315494641, - "sentence_nr": 14.5 + "score": 0.3996712647649035, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.5358317196308513, - "sentence_nr": 14.5 - }, + "score": 0.6353525755760105, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "sentence_nr": 14.5 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5115346945020283, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9569342865902168, - "sentence_nr": 14.5 - }, + "score": 0.7037574715738644, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.283646935447629, - "sentence_nr": 14.5 + "score": 0.017834618169115152, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.46056393670415496, - "sentence_nr": 14.5 - }, + "score": 0.05927156798818119, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "sentence_nr": 14.5 + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.23904922011090457, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9414698824984596, - "sentence_nr": 14.5 - }, + "score": 0.3399292774084129, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.26232478733341374, - "sentence_nr": 14.5 + "score": 0.6152980280400979, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.44641220608314985, - "sentence_nr": 14.5 - }, + "score": 0.8311281590297233, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "sentence_nr": 14.5 + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.005449161724399305, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9402707475255596, - "sentence_nr": 14.5 - }, + "score": 0.026158029267484995, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.29097818784870333, - "sentence_nr": 14.5 + "score": 0.24508104771894088, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.44978114149245985, - "sentence_nr": 14.5 - }, + "score": 0.5725552336126134, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "sentence_nr": 14.5 + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9815706066541411, - "sentence_nr": 14.5 - }, + "score": 0.0, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.3193235920661593, - "sentence_nr": 14.5 + "score": 0.20801258614305904, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.4875691290722964, - "sentence_nr": 14.5 - }, + "score": 0.26703508536995574, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "sentence_nr": 14.5 + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.35315040956049437, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.938086045460355, - "sentence_nr": 14.5 - }, + "score": 0.625895188503691, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.26442484966880464, - "sentence_nr": 14.5 + "score": 0.11133996756497437, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.44452182973195975, - "sentence_nr": 14.5 - }, + "score": 0.4410280353998367, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "sentence_nr": 14.5 + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17743299460161885, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9428185393832219, - "sentence_nr": 14.5 - }, + "score": 0.43071271897416463, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.31956422674397006, - "sentence_nr": 14.5 + "score": 0.16052654068024738, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.4896277852320754, - "sentence_nr": 14.5 - }, + "score": 0.41580120868053494, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "sentence_nr": 14.5 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.05963579607071745, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9598314474300775, - "sentence_nr": 14.5 - }, + "score": 0.31139762378406344, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.29354203311552335, - "sentence_nr": 14.5 + "score": 0.006734847287559362, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.4723493656022861, - "sentence_nr": 14.5 - }, + "score": 0.03408121951468736, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0.6666666666666666, - "sentence_nr": 14.5 + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.09880177230676102, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9439490413212892, - "sentence_nr": 14.5 - }, + "score": 0.3297638349619511, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.28142583904826096, - "sentence_nr": 14.5 + "score": 0.2377604053257556, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.4707591889357925, - "sentence_nr": 14.5 - }, + "score": 0.5662768009060447, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "sentence_nr": 14.5 + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.22573408807826306, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9371132855221468, - "sentence_nr": 14.5 - }, + "score": 0.5444672928195973, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.2723410893967824, - "sentence_nr": 14.5 + "score": 0.10742716472890976, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.43929453749366865, - "sentence_nr": 14.5 - }, + "score": 0.42694859148910824, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "sentence_nr": 14.5 + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9298143194922116, - "sentence_nr": 14.5 - }, + "score": 0.0, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.23729186537968905, - "sentence_nr": 14.5 + "score": 0.0, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.4198940727847352, - "sentence_nr": 14.5 - }, + "score": 0.0, + "sentence_nr": 2 + } + ], + [ { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "sentence_nr": 14.5 + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.14745870033404418, + "sentence_nr": 2 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.9355445912073929, - "sentence_nr": 14.5 - }, + "score": 0.475170637938921, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.20332074778330964, - "sentence_nr": 14.5 + "score": 0.21665407194210906, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.4005255477730261, - "sentence_nr": 14.5 - }, + "score": 0.4344921442639243, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "sentence_nr": 14.5 + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.37994652561206577, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", + "bcp_47": "es", + "task": "translation", "metric": "chrf", - "score": 0.9440892312053646, - "sentence_nr": 14.5 - }, + "score": 0.6464467277069994, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.24903049799655144, - "sentence_nr": 14.5 + "score": 0.09362261118571368, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.42489125861884175, - "sentence_nr": 14.5 - }, + "score": 0.3452056942265759, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "sentence_nr": 14.5 + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.18917620656425485, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", + "bcp_47": "ur", + "task": "translation", "metric": "chrf", - "score": 0.9025298113664532, - "sentence_nr": 14.5 - }, + "score": 0.4346170232980484, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.3368333727390049, - "sentence_nr": 14.5 + "score": 0.420450507904553, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.4873541452250132, - "sentence_nr": 14.5 - }, + "score": 0.6503146347305717, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0.5, - "sentence_nr": 14.5 + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.24894072982768842, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", + "bcp_47": "bn", + "task": "translation", "metric": "chrf", - "score": 0.9641423549595803, - "sentence_nr": 14.5 - }, + "score": 0.5212235893093335, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.3110105331834714, - "sentence_nr": 14.5 + "score": 0.393613605227227, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.49172080600981716, - "sentence_nr": 14.5 - }, + "score": 0.6492198447661237, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "sentence_nr": 14.5 + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.21147734744561483, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", + "bcp_47": "pa", + "task": "translation", "metric": "chrf", - "score": 0.9663696817874857, - "sentence_nr": 14.5 - }, + "score": 0.41020178654369294, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.26015179309825326, - "sentence_nr": 14.5 + "score": 0.2329856851831642, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.4525762744858351, - "sentence_nr": 14.5 - }, + "score": 0.5405751250637106, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "sentence_nr": 14.5 + "task": "translation", + "metric": "bleu", + "score": 0.41756686236967944, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", - "task": "language_modeling", + "task": "translation", "metric": "chrf", - "score": 0.9401745263817055, - "sentence_nr": 14.5 - }, + "score": 0.5616829345739638, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.30022614000139736, - "sentence_nr": 14.5 + "score": 0.38189567401226293, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.46620999112028233, - "sentence_nr": 14.5 - }, + "score": 0.6154314825900052, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "sentence_nr": 14.5 + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.2126707920684064, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", + "bcp_47": "de", + "task": "translation", "metric": "chrf", - "score": 0.9641464401452432, - "sentence_nr": 14.5 - }, + "score": 0.4659908460634765, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.2528104486727614, - "sentence_nr": 14.5 + "score": 0.23240102389974368, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.42077045938882934, - "sentence_nr": 14.5 - }, + "score": 0.4973274282641141, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0.36666666666666664, - "sentence_nr": 14.5 + "task": "translation", + "metric": "bleu", + "score": 0.17979384730979156, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", - "task": "language_modeling", + "task": "translation", "metric": "chrf", - "score": 0.9513838296654142, - "sentence_nr": 14.5 - }, + "score": 0.4177311931467539, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.28338449781147135, - "sentence_nr": 14.5 + "score": 0.1702602472176709, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.467003606031036, - "sentence_nr": 14.5 - }, + "score": 0.4366640707779677, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "sentence_nr": 14.5 + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.933651069586263, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", + "bcp_47": "jv", + "task": "translation", "metric": "chrf", - "score": 0.9446670623712353, - "sentence_nr": 14.5 - }, + "score": 0.9586507529693243, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.2844596261435892, - "sentence_nr": 14.5 + "score": 0.3816408219023713, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.4525478264239361, - "sentence_nr": 14.5 + "score": 0.5784105768028126, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.18398226639192106, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0.5, - "sentence_nr": 14.5 + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.37285010531146734, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.26958884543190903, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", + "bcp_47": "fa", + "task": "translation", "metric": "chrf", - "score": 0.9182703887696211, - "sentence_nr": 14.5 - }, + "score": 0.5631664732610485, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.2434522914619727, - "sentence_nr": 14.5 + "score": 0.4005296397635166, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.42196914378741973, - "sentence_nr": 14.5 - }, + "score": 0.6201785376974677, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0.6, - "sentence_nr": 14.5 + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.15956483578595942, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", + "bcp_47": "yue", + "task": "translation", "metric": "chrf", - "score": 0.9580044761495575, - "sentence_nr": 14.5 - }, + "score": 0.425693420655628, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.25541664062319624, - "sentence_nr": 14.5 + "score": 0.2323385180696658, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.43688984396841446, - "sentence_nr": 14.5 - }, + "score": 0.5019509292309764, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "sentence_nr": 14.5 + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.22952177306405494, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", + "bcp_47": "it", + "task": "translation", "metric": "chrf", - "score": 0.9022415223117748, - "sentence_nr": 14.5 - }, + "score": 0.5279520952576137, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.22628178945118504, - "sentence_nr": 14.5 + "score": 0.3618488169166299, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.4052050858435178, - "sentence_nr": 14.5 - }, + "score": 0.5708179622131996, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "sentence_nr": 14.5 + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1712766252338756, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", + "bcp_47": "arz", + "task": "translation", "metric": "chrf", - "score": 0.9003842558834025, - "sentence_nr": 14.5 - }, + "score": 0.5225554962608486, + "sentence_nr": 2 + } + ], + [ { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.407349841474714, - "sentence_nr": 14.5 + "score": 0.2709079038456153, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.5862284100611604, - "sentence_nr": 14.5 - }, + "score": 0.447458019441992, + "sentence_nr": 2 + } + ], + [ { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0.43333333333333335, - "sentence_nr": 14.5 + "task": "translation", + "metric": "bleu", + "score": 0.38249626297768063, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "language_modeling", + "task": "translation", "metric": "chrf", - "score": 0.9312514772455793, - "sentence_nr": 14.5 - }, + "score": 0.40976234193505356, + "sentence_nr": 3 + } + ], + [ { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.34049537977839345, - "sentence_nr": 14.5 + "score": 0.5806197937310393, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.4566714452688056, - "sentence_nr": 14.5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0.4, - "sentence_nr": 14.5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.831019102021034, - "sentence_nr": 14.5 - }, + "score": 0.7346706700987636, + "sentence_nr": 3 + } + ], + [ { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.2941657600332359, - "sentence_nr": 14.5 + "score": 0.5793367580502561, + "sentence_nr": 3 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.4559600841124037, - "sentence_nr": 14.5 - }, + "score": 0.6502428441722727, + "sentence_nr": 3 + } + ], + [ { - "model": "mistralai/mistral-nemo", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "sentence_nr": 14.5 + "task": "translation", + "metric": "bleu", + "score": 0.4855332614117322, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-nemo", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", - "task": "language_modeling", + "task": "translation", "metric": "chrf", - "score": 0.9126664788853867, - "sentence_nr": 14.5 - }, + "score": 0.5299556742893647, + "sentence_nr": 3 + } + ], + [ { - "model": "mistralai/mistral-nemo", + "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.3057719571177098, - "sentence_nr": 14.5 + "score": 0.19940445989088915, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-nemo", + "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.45969934521843914, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0.5333333333333333, - "sentence_nr": 14.5 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8098290899553928, - "sentence_nr": 14.5 - }, + "score": 0.43164821827950184, + "sentence_nr": 3 + } + ], + [ { "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.32971687049116577, - "sentence_nr": 14.5 + "score": 0.2423441824135159, + "sentence_nr": 3 }, { "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.5266852291276966, - "sentence_nr": 14.5 - }, + "score": 0.4429509373913047, + "sentence_nr": 3 + } + ], + [ { - "model": "mistralai/mistral-small-24b-instruct-2501", + "model": "google/gemini-2.0-flash-001", "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "sentence_nr": 14.5 + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", + "model": "google/gemini-2.0-flash-001", "bcp_47": "en", - "task": "language_modeling", + "task": "translation", "metric": "chrf", - "score": 0.8397325387031247, - "sentence_nr": 14.5 - }, + "score": 0.7056438934239434, + "sentence_nr": 3 + } + ], + [ { - "model": "mistralai/mistral-small-24b-instruct-2501", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.39829687791881524, - "sentence_nr": 14.5 + "score": 0.6064630666233242, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", + "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.5018233196301349, - "sentence_nr": 14.5 - }, + "score": 0.6752055521830945, + "sentence_nr": 3 + } + ], + [ { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0.6333333333333333, - "sentence_nr": 14.5 + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5357110024227318, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", "metric": "chrf", - "score": 0.9167475699419941, - "sentence_nr": 14.5 - }, + "score": 0.6365941772753647, + "sentence_nr": 3 + } + ], + [ { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.3584454346550059, - "sentence_nr": 14.5 + "score": 0, + "sentence_nr": 3 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.5407815810671934, - "sentence_nr": 14.5 - }, + "score": 0.0, + "sentence_nr": 3 + } + ], + [ { - "model": "openai/gpt-4o-mini", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0.5666666666666667, - "sentence_nr": 14.5 + "task": "translation", + "metric": "bleu", + "score": 0.14790264259417688, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", - "task": "language_modeling", + "task": "translation", "metric": "chrf", - "score": 0.9807297060840695, - "sentence_nr": 14.5 - }, + "score": 0.27159767590045303, + "sentence_nr": 3 + } + ], + [ { - "model": "openai/gpt-4o-mini", + "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.41353083493324033, - "sentence_nr": 14.5 + "score": 0.4751132438608344, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.5310916105215437, - "sentence_nr": 14.5 - }, + "score": 0.6849386986272349, + "sentence_nr": 3 + } + ], + [ { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0.4666666666666667, - "sentence_nr": 14.5 + "task": "translation", + "metric": "bleu", + "score": 0.08635800047213174, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "language_modeling", + "task": "translation", "metric": "chrf", - "score": 0.9313441068235117, - "sentence_nr": 14.5 - }, + "score": 0.218109371254876, + "sentence_nr": 3 + } + ], + [ { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.37449321023356824, - "sentence_nr": 14.5 + "score": 0.23386786214190372, + "sentence_nr": 3 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.559410465345808, - "sentence_nr": 14.5 - }, + "score": 0.3682311523733465, + "sentence_nr": 3 + } + ], + [ { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0.0, - "sentence_nr": 14.5 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11739521786077453, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.5225508930892153, - "sentence_nr": 14.5 + "score": 0.22090491782919655, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1892240568795935, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.280413108453108, + "sentence_nr": 3 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.20068036705764214, - "sentence_nr": 14.5 + "score": 0.11547518641061649, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.23884729813422853, - "sentence_nr": 14.5 + "score": 0.25945846414490087, + "sentence_nr": 3 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20233074088759792, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "mistralai/mistral-nemo", "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0.0, - "sentence_nr": 14.5 + "task": "translation", + "metric": "chrf", + "score": 0.3746629492952356, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.40214612768560637, + "sentence_nr": 3 }, { - "model": "qwen/qwq-32b", + "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", - "task": "language_modeling", + "task": "translation", + "metric": "chrf", + "score": 0.45128424593135114, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.37284875432797243, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", "metric": "chrf", - "score": 0.4174856398225174, - "sentence_nr": 14.5 + "score": 0.44888401040760956, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0925329498915617, + "sentence_nr": 3 }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2110486160692096, + "sentence_nr": 3 + } + ], + [ { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.22828858009816946, - "sentence_nr": 14.5 + "score": 0, + "sentence_nr": 3 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.3698284418118128, - "sentence_nr": 14.5 + "score": 0.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.12453389344594705, + "sentence_nr": 3 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.141543757252386, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2594145364221844, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.6244631487487835, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6931369519059803, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.25383339228798274, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.45896379476820603, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.17200767571780612, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3723150838362789, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.1477219991186121, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.28685201698226354, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3254455687469726, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4474512036484817, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.23887527917609022, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4120359948636439, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.728208634600343, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.3556521383601747, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.594830811413066, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21629114799587432, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3542320138389837, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.728208634600343, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.27405612859390877, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4639958592456083, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.13004800471424346, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.28217142159025543, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.37821486365532614, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4718665834023439, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.3699382260470039, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4032851361478274, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.45167594566243024, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.5169677927619225, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.23386786214190372, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3780009826926042, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3925121365052661, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.47788592802001717, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.1423412184218882, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.2596718628394258, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.3572188192648703, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.45381175288762937, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.07425055521504613, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.18122341046764998, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.1978585723043446, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3527599187160617, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.2523019529343173, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4406369072888057, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.41072675483179805, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5635589150380774, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3883375900135818, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4643731845106876, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7123666275414222, + "sentence_nr": 3 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2246029757863831, + "sentence_nr": 4 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5773502691896258, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7999099314029202, + "sentence_nr": 4 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6417603075499863, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7825422900366437, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8503171627677965, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.37709297891717664, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6881502501430368, + "sentence_nr": 4 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5581982021478125, + "sentence_nr": 4 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.652013511062815, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5881561248602009, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.40435987083533204, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.39858613265631837, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4425973012069069, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.47160616105623426, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5309982646782259, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.1892240568795935, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6151179643430991, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.41238100267720657, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.39909989628767284, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.17181529671327242, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5293474685884572, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4429196299668147, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.1667955161379731, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5802683403568892, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3237722713145643, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7426638026175545, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.49342175914364256, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4352628824108997, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5116862201536014, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.33471616336068044, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2865612242047131, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6433813179203622, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3598792258309727, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5125809225356253, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5539920925426138, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5226572946586268, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5073395824633415, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.29382595610734974, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1667955161379731, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5773664661124461, + "sentence_nr": 4 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7013062757071812, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9303769449292738, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2381658499765768, + "sentence_nr": 5 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9063898435384111, + "sentence_nr": 5 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5309354663044072, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6756014232714684, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4529852871970908, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6941474239078328, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8522456714074852, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9096914044088521, + "sentence_nr": 5 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.9457416090031758, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9892952933418456, + "sentence_nr": 5 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5087473540251254, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7647955332172516, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5087473540251254, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7647955332172516, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4234885228074744, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7410180114887145, + "sentence_nr": 5 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47375069012411286, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7543919667018285, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5738396574789242, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.798357133373606, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5738396574789242, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.798357133373606, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4234885228074744, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7410180114887145, + "sentence_nr": 5 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47375069012411286, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7543919667018285, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7829685247145245, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.40276720463657734, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6529271690805427, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.30188353873287377, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6086565367747951, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.6026286934891149, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.8025775976044891, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.713787745993602, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3272712268138726, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6272846474183881, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2981792160679168, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5788026000794341, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.7012294787544179, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8478115719875968, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.21690365808279138, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5384773678665918, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.25711386542134795, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6088853751738869, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5695988432761473, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7516103467926585, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7361065921505279, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.38091370416670794, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6438225861756911, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7202697992734389, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.5309354663044072, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6990707992725005, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1673872929477023, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4506667273103674, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3416581331218724, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6578570934289981, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.4797543511401896, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7240781310560407, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.6401876410870359, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.7526484951226097, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33057129676705455, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5669225664686625, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7697646564917222, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.30350690419450826, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.569133886912883, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6834516951654327, + "sentence_nr": 5 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3263040636562357, + "sentence_nr": 6 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5838790966762375, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3142665434344143, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6466526067220029, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3751840463233443, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6279894552667558, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19268479640608693, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.551397074868541, + "sentence_nr": 6 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17470942957770763, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5403400891349619, + "sentence_nr": 6 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5763410052067085, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.37392149096896676, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6665214662145853, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5838790966762375, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5763410052067085, + "sentence_nr": 6 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5460240376042262, + "sentence_nr": 6 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.24343304284910333, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6275577931282961, + "sentence_nr": 6 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.28571962561926445, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6431872581462166, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.6255340042200862, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.8724783049357475, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.7411155087367244, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.4547900039222725, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6541971428810075, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.7122562458056777, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6824395076981005, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.38305978177479755, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6061131723054572, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7289444696770301, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7317734491561229, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3684981984538114, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5606332518476288, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4536404448264584, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.8020827133708689, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.4545091839935173, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.7166050399790445, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3370129264673147, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.7096874943799061, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.1624355752882384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4952968469712617, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.7807505267551733, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.5595205105615875, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.8322210048001876, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3142665434344143, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6466526067220029, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3610544299180199, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.49125115898082056, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7164026439677106, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8578928092681435, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9422733087334002, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.42818224355402373, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.42105372680687736, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7001171094008295, + "sentence_nr": 6 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1582866049832572, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.34487142413575794, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15521606028436608, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.37645329404497957, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12620429887108936, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.35580703793872603, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12872220631084524, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.33602633953270183, + "sentence_nr": 7 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.042121062429802174, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.14281404499176092, + "sentence_nr": 7 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.042575418285137674, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.05173688961049459, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3045613775157565, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5275070803493389, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2734283774929853, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5252214120598302, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.10203846572325131, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.33381153680096753, + "sentence_nr": 7 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.014935758919429663, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.08106107745254391, + "sentence_nr": 7 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.044304867337633724, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20806974344498103, + "sentence_nr": 7 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.08860973467526746, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3178004360288637, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.15268019045355535, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41028757620299977, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.030860166165309233, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1100250143829584, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.21255327712152144, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.43272151570555034, + "sentence_nr": 7 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.01486609147288197, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.13893773605583024, + "sentence_nr": 7 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.06609667473412645, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.26197209338359717, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.26064517697298795, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5092206110218525, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1507980395794452, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4306039128585424, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1438459189500836, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.30693371625402605, + "sentence_nr": 7 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0979038733644086, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.30211704738953993, + "sentence_nr": 7 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.009624974244068071, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.07318255686027669, + "sentence_nr": 7 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.043420474648595074, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2884095690753619, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.13868172938464635, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3094469764260441, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.11091252683001185, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.26607634610445896, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.18154954789336694, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4557483776072868, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.1381751568911733, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3121557499162649, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.20065115069964384, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4084885616013531, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.12291219097556666, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3448002180666873, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.20608572305725564, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.4704943905570542, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.085416483900781, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.2825804066750608, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.11452508920842025, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.3212742401272785, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.15478222669012726, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3550584759508654, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.07875433150726119, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.2638954513805452, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.10734088848154077, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.33946796348247366, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.17795920517030017, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.41862955401967455, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.19388048412249795, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.44361702376789247, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1237012344369667, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.33331866832253354, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.15589802574348086, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.37894206802233305, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1948502778967486, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.35525815981538433, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.1618333627385132, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.3458746996740858, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.17393111207515277, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.39042812195808824, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.19064689695123957, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.36954921822756504, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.1785851272602057, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3800733399524004, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.20113943179758872, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5054929215592371, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.07088281524771703, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.1725752257112697, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.11901413329120636, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2908877283991857, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.15593857496482408, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3832822126692406, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.21107720643690867, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.43911506176829573, + "sentence_nr": 7 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13410301071131794, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3942932268034351, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.30677064886592076, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5308555945242818, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1327526847508867, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.37850602486495205, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18405035438430847, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4142901090120915, + "sentence_nr": 8 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.061826017721563604, + "sentence_nr": 8 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.08852681798207009, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3583179111355935, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3857436691295343, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5750224388123065, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5920893212447781, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6925021521158101, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.22478613858269392, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.44348101018104913, + "sentence_nr": 8 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.183687049781416, + "sentence_nr": 8 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.351911486970854, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5181825846579515, + "sentence_nr": 8 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17328174803055044, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3178268797869574, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.286608441075188, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4579283646292802, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.25861130592298187, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.39452644092432093, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20379250618355427, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41085414309816914, + "sentence_nr": 8 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.310679343206099, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4471183729584148, + "sentence_nr": 8 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2334787866969297, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3621517589760531, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5866873582151947, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.46269559069048716, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.46872641361415845, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10434360980785336, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3012789660952507, + "sentence_nr": 8 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.13835317113453516, + "sentence_nr": 8 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.16343842313572918, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3986641525285075, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5553909583113487, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.1690979933029136, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3751861276375209, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2840563956846642, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5110250591004448, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.19920413481788912, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.42537796926163113, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.20401796878756984, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.43317630453631556, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.2044887070217883, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.38471585132587544, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2980504190448601, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5101268920225042, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.9436043261706615, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.9880191679951993, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.18831933500600306, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4318025704181776, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21544027588567594, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5040038440508637, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.26970223719007375, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5172978597562362, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.30630098078522544, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5439056051092116, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.19850842371858787, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.43584341835040474, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.20170335119323748, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.3541251997977811, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.10508106635796587, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3182774828667731, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.2309552734743087, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.43975656978777905, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.23530033724858213, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.46208607300298377, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.37284027455688556, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5528347504734102, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2887308472548599, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.38846174119508314, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.15487293534817623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.39293494862736383, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.21741853044139284, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3535910166292039, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33626819961829335, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5466581859383387, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.32000331642122953, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5480591855923784, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.21132630077912357, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4175670766052166, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.13108369255325433, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3929302741911199, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.20174045447955946, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.33729298835089516, + "sentence_nr": 8 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.20972571494011877, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.395894071208527, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.35369375385786006, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13087682931309413, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19462952976787054, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.013538497707846785, + "sentence_nr": 9 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1570208067577934, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4113045280468524, + "sentence_nr": 9 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15082713742973322, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3965911699770542, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15471428129658016, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4580211317461481, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18928475425929295, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4916060435820526, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.21940429389247643, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4343280866601455, + "sentence_nr": 9 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1824401863423467, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36709433185688595, + "sentence_nr": 9 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3377854698776805, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.521201229892482, + "sentence_nr": 9 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12475846123062707, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.27823340731817514, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10759927692349745, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.21065794536310511, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.07843772989359644, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1324578891826276, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.08163977068875294, + "sentence_nr": 9 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.09047502044256338, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.21669141850731985, + "sentence_nr": 9 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10322985794794913, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.24491122482530842, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11809057094812304, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.27930342777387007, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.21268444697113978, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3229997133764549, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1475503033983142, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22104108935973044, + "sentence_nr": 9 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.16434349396840395, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.28582614857210975, + "sentence_nr": 9 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10085167559661873, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.23831215045289575, + "sentence_nr": 9 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17543744527808774, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.28201016956553354, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.12274092982883021, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3385513651938691, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.31017716089889963, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.21688283061839067, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.41775824162589076, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.12162779391619735, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3228288840559658, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.18237599479708327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.3740403511567824, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.12876689524369925, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3253153379449275, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.119159749312327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.21297942664093145, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2036348471340078, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.3472831655579266, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.24362353508932386, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.28135849152758385, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.14482189302397735, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.2913876815877049, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.16306957103469613, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.9199349282509897, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.28112283847231073, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1308613527030366, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3063146286877558, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.12787395553510186, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.21931515993565381, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.1441966459257424, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.14957316612525498, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.27675048474641756, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.3780460244391623, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.12503614625842938, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.20624064341134082, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3368893372278425, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.2961559727627133, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.12846497020051437, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.2670865602673704, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.28252374116432993, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.3549531183419122, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.26128489301072644, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.2126837065505244, + "sentence_nr": 9 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.07149097424598219, + "sentence_nr": 10 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7774075575820374, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8943538262827356, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18639667871924825, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4540232715517938, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8431643718744966, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9341410275694613, + "sentence_nr": 10 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47095916883357913, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.701526330557871, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.38260294162784475, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6692418584049541, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4093629115744712, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6243156092220487, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.36703839483583006, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6725357332891145, + "sentence_nr": 10 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4322450379367835, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.697398762810304, + "sentence_nr": 10 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.41122010762096617, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6697492221087861, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.41126318495820946, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7254294465493162, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4682601513034942, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.691130012325589, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.35334199245807973, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6858610070406853, + "sentence_nr": 10 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.33061666631099795, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5343307680770133, + "sentence_nr": 10 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.38981415389445495, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.665622189515994, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3223937524276847, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6719135382778884, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.4466645979681496, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.714247354760266, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.6233091888805312, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.7757111039890131, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.33414322499224436, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.7159580680193959, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.6620694102966999, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.7893416551805176, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.26540383860058264, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.51610805930355, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.49335830881778164, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7240615166053675, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.4024279293206815, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6798070651801875, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.6153147385756811, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.8160952378322835, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.6838493012537611, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.8178509424142287, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5169198985488462, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7879691803533485, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5223010192696725, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7442134884509299, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.3885151883045163, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6763151870864087, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5985488590218004, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.8248561222494313, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.37163791993879014, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.6792432753943116, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.5152630372775983, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.7696821316655393, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.43521980294891405, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.7204319998551938, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.46417187236805535, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6653227698984816, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.519124054532681, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.7733428788002137, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.5083170211670072, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.755952798269267, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3161432307247198, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5990810117425377, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.40980949787910764, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.7145653936496129, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.5770135999436572, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7697316849447288, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.7030214416074754, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.8357829168322639, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.49199339399396913, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.713934780293142, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.5002824356846001, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7029341279811726, + "sentence_nr": 10 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.029124970213905314, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1779610499753793, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.05989397907532586, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.13539167567510446, + "sentence_nr": 11 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.03073685498855941, + "sentence_nr": 11 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.08933758530290428, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.21051269871304829, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.18854722085547196, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1387123733773652, + "sentence_nr": 11 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.05499461839884487, + "sentence_nr": 11 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19978068293555115, + "sentence_nr": 11 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1388011701223677, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1460389336009171, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.038796252164058714, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1756002877791377, + "sentence_nr": 11 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0029868578255675027, + "sentence_nr": 11 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.015380253532528225, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.19065171436703615, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.21083781655774478, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.14590438247348272, + "sentence_nr": 11 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.04379419293412465, + "sentence_nr": 11 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.15119622228734425, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.21315318926996712, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.16991425356152365, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.22371589981083434, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.20982178138488494, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.20189358781069322, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.20261685251676126, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.226729844497646, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.18184342512086546, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.2185121523322681, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.17386106914161167, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.08272059515141832, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.1814025725787457, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.23945930551153607, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.20815933215961574, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.09886053260067004, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.14345644530149382, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.19097844728039898, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.08246021416977749, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.18868639139421345, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.20665565461558383, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.17764901410543646, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.19312651305380893, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.21371557282714232, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.18854043679878274, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.19559831357902827, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.1914895496057553, + "sentence_nr": 11 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6666935927206881, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7957561291403441, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.34999116613463505, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6356075517191035, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.48649824146709, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6763447333054696, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.367622917844187, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5615050712672139, + "sentence_nr": 12 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4081538556642202, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.46386216052527535, + "sentence_nr": 12 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4300174433641992, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5099800158255156, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7963205130973803, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8101688749569373, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6570128212612868, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6262090565616182, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5866943184579982, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6390393619950272, + "sentence_nr": 12 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 12 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.01047222192173988, + "sentence_nr": 12 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5683565265173782, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7072367582469653, + "sentence_nr": 12 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20287366424876002, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5368464080033196, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5198707241967666, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6993305416237223, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.36603776814499195, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.45532918164901276, + "sentence_nr": 12 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.13525036115537795, + "sentence_nr": 12 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3120848453730729, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3474347870952493, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7073395735740273, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6577952971578602, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6570128212612868, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6221526807313811, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5460462259563637, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6641829079106271, + "sentence_nr": 12 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.04884431803904408, + "sentence_nr": 12 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.18357384275951122, + "sentence_nr": 12 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.28073304156067924, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.360657984953223, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.46365764298816153, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5757521453586436, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.3147715014841853, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5986154863155839, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.3885646234110734, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5051669760132699, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.464413403675355, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6291656356697347, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.30490938758882236, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.579088460457721, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3758073513458154, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5302950018189692, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.29308025637967977, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5715200997140051, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.43285599641891276, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5551678521355665, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.25984882476296983, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.6305744214119023, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.48649824146709, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.7255446918266525, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.464413403675355, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6853183317800515, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.34999116613463505, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6356075517191035, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.4426623526629488, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.612058732370435, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5522004843736675, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6166558670381421, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.37954187220913477, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5550325994532472, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.3147715014841853, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.521228891025682, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3964513253420688, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6095420129111676, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.36033217429111203, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5550014071110869, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.33403925633579773, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5753930328058733, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.44882520213790794, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5856175239899348, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.42760828727369016, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6065010489098535, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33403925633579773, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5915394296427854, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.3212785834179169, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6158121620368939, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.1751489536280261, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.378593296276962, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3214110553053944, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.49232390716994445, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.479033905070678, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5975149526416976, + "sentence_nr": 12 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13150403915662862, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.21177549089429396, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1424915360855107, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.23985076149753726, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13309638637723345, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.18696197122203645, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12256515595630638, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.23303109995893123, + "sentence_nr": 13 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1419886619859991, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.24113733359485448, + "sentence_nr": 13 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1324448705928064, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.22863839042697148, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12017886776600228, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20794486026487116, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1164257728844972, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19249901344360867, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12325384013681445, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1960232617116645, + "sentence_nr": 13 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12806473847444227, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20054688779645718, + "sentence_nr": 13 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1345714227066951, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.21078968525268058, + "sentence_nr": 13 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1802615495980454, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19630112442374525, + "sentence_nr": 13 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8212614342207556, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7876222308170935, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5888582552569348, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7876222308170935, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5888582552569348, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7876222308170935, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6871546336787117, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6871546336787117, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.43550490048931545, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6419345531187637, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17539593635425982, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3139104155809725, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.39225487001250453, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5189967318357492, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12859070457371286, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22162336097079333, + "sentence_nr": 13 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6871546336787117, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6258765997974801, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6680248455809015, + "sentence_nr": 13 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6258765997974801, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6722124517361844, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.17023327167529265, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.2521455524828544, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.13150403915662862, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.2229548791980166, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.15247670030930355, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.1324448705928064, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.23382021475411732, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.09766807787022613, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.16788063248730647, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.12111615182138995, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.21505717177216926, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.09979796185764318, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.1310501345458609, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.11512937599552589, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.1852451960926282, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.126642985054506, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.20913543330915318, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.12632059501697884, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.22490978846607526, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.1352612651586241, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.22176710342008016, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.18982400330057914, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.11760179026027952, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.19531596229980544, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.09968269909242322, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.14510210137368384, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.1204925245474865, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.12192273449574796, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.18177358407861108, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.16841504132177978, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.10667790151233097, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.17427579502643556, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.1508875367739971, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.20889434105456664, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.13184959768302618, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.30505662513933907, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.09878901581794378, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.1651800705978423, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.13150403915662862, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.20736628090200235, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.11824658049755846, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2047497542808756, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1461072488843534, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.1946917085815184, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1018151014848322, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.14524830913329922, + "sentence_nr": 13 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2589080403198245, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2189767496390278, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.09761931247072746, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1397102655312677, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1326689502117876, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.167569694983793, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.15848968577272604, + "sentence_nr": 14 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.24447662789322752, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20665940380705064, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18243716955007858, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.16168125580314086, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2450013599045987, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20901732384345645, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.20222677481313764, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.18492694642397273, + "sentence_nr": 14 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18243716955007863, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.16667457585564618, + "sentence_nr": 14 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.08556679632324991, + "sentence_nr": 14 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1575852366903021, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1474874322154398, + "sentence_nr": 14 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9202663016973823, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9263876898254182, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8621431910551439, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8363304387269249, + "sentence_nr": 14 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9419492177147062, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9202237383102091, + "sentence_nr": 14 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6656058483395763, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6306557167105028, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8657947138469048, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8367521498141209, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9419492177147062, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9202237383102091, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6993348038140574, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6335836519040372, + "sentence_nr": 14 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9419492177147062, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9202237383102091, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.11064738383914807, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.12449466772796605, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.12222372495044852, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.12383047729216191, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.1392580908972882, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.1333265070823728, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.07717159074475938, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.09413026539458375, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.16807498532991816, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.16404257857373192, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.21005284223037346, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.1679703861465872, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0951509584925814, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.12014553061064691, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.11737915185320068, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.10085050674562507, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.11377195287577829, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.1301681094143453, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.09455636771034115, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.11463120929696417, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.1544787887603271, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.1384236976807813, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.11488572123868507, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.1455973492295447, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.13735441291745387, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.20255423961944058, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.205408273869532, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.11470196605012067, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.0960438892364715, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.07184436307032757, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.20378989148152887, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.16337212771611656, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.09669863605676213, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.10886215421099144, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.18171364159867548, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.16245793974098002, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.07562263205281951, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.09819928715831736, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.1430606569063152, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.128073928655324, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.09526781380423786, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.18223449608285797, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.17127401148639734, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.09855718610544388, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.09669863605676213, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.11679541132562438, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.09643517424337235, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.1226126790254367, + "sentence_nr": 14 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3969253441303859, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.43277080710930865, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.26887073704667247, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2918476164856665, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5183146371291372, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5942793492554739, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.028864519535915668, + "sentence_nr": 15 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13535086012687783, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.29687399422087424, + "sentence_nr": 15 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.002376388269368755, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.04574695485583133, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2703094106380642, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2982249908859, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.29313061087267483, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.30295384730328956, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.284911205299835, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.32067889250923776, + "sentence_nr": 15 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.29353055611145706, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3381266475327612, + "sentence_nr": 15 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.09910529437987022, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2511990291834263, + "sentence_nr": 15 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.295394335805579, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.362515947701148, + "sentence_nr": 15 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7243776840931383, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8980107630353439, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9325718821645923, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9490053815176721, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6653044831075519, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7986980418662383, + "sentence_nr": 15 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8504591592783618, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8980107630353439, + "sentence_nr": 15 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5950322600507224, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7090542316843602, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.44768974737795825, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.45520472994232203, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6509298345623671, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7962234681835563, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.41813929088914065, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4779008399806691, + "sentence_nr": 15 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7243776840931383, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8642805496461259, + "sentence_nr": 15 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9506885335787997, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9606382935593174, + "sentence_nr": 15 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8665175293126633, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8642805496461259, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.342569723746894, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.47156710056973744, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.2319934375578505, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3367678538644817, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2261681529206079, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.2647144854968396, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.35554722872430145, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.38873710544604445, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3720000272862786, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.44695658930348453, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4118588818865406, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.48573453292579605, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2998354233286452, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.41144215385645566, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.42142495511264777, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.49708063531780444, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.33296735510279596, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4176386300927819, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.32522259162581857, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3572499606049779, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.3449668516380805, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4341194278942322, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.36161896085795575, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5052818563161547, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.2798191316489921, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.31866179281073254, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3170440263520106, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.45327673850268096, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.15538140800156827, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.22365453282977818, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.1352815632479558, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.2610624350708668, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.35907597395908514, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.433310273977633, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.33498522957587384, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4529680464694055, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.30675389390381064, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.49190118767827684, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.13922661372145656, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.27553494979330584, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3515170550015674, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.37881852198491145, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.30950829536527374, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3839157172568008, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.22141947821999777, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3633108862011865, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.2957849631521743, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2872269269040579, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.19474118932727338, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3257294949902081, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.26505727008662233, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.41342120940573923, + "sentence_nr": 15 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5489548889989204, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5292552311493306, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.43141660874998483, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4251732952639193, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.479859141564773, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.47978767796651084, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2751349202729036, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.311148395820729, + "sentence_nr": 16 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5435154526669127, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5395341377171525, + "sentence_nr": 16 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5777979902630328, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6331337405946555, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6121338866063298, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6222767269627676, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5440627210252523, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5801365308278273, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5097049681318312, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5622473457673939, + "sentence_nr": 16 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.42567378467735034, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.470165978205223, + "sentence_nr": 16 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.47594607773277786, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5363851621507516, + "sentence_nr": 16 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4533373633026252, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5042718376547173, + "sentence_nr": 16 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8509306641805077, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9162670716850285, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9682566771439106, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9779127328168863, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7040822331405046, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7673268835807536, + "sentence_nr": 16 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8509306641805077, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9162670716850285, + "sentence_nr": 16 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7639225615341296, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8135226479972402, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6729400620282456, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7157738382386983, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6736973998414632, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7157738382386983, + "sentence_nr": 16 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7640211005075139, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8179683170395244, + "sentence_nr": 16 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8509306641805077, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9162670716850285, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.43141660874998483, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.45005622460103567, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.5269212212163125, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5528502361092263, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.6736973998414632, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.7157738382386983, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.33491174038847354, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3646077683106875, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.450293182440332, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4822292034174927, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.19834633509680927, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.2712763621688402, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.546749262754264, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5830342194369027, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2754139367364165, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.34665831783057166, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.42877544777223947, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.43803970127356867, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.43908893511874636, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4785460996828672, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5898466143484524, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6611594562951559, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.44701416909786756, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5245065297475329, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.31417347869916407, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.3530975487930333, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.6373258340947424, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6437421244363288, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4715455630189013, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.543275675805182, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.2807304798995431, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3418543172008782, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.5397682182130759, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5703951757357331, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.5446420954986508, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5662782206307382, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3378721588486122, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4362453299175689, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.49288474585647657, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5578180330951528, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.36197274748300795, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.36134314178088084, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.17060055774694924, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.2566677182784047, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.5717883675148524, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.640780099960748, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.221071468018936, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.41620491059292214, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4263215396273059, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3711481893609263, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.4101392170618868, + "sentence_nr": 16 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7838756540325346, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8813081534414112, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6486802664285581, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8066891982024211, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7344798528986015, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8855631322316195, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6486802664285581, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8585894188661937, + "sentence_nr": 17 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7838756540325346, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8813081534414112, + "sentence_nr": 17 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8434569599214109, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9123500588239437, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7849324644314795, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8934780380564308, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7838756540325346, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8799941663695641, + "sentence_nr": 17 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6809354000776107, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8640242853252401, + "sentence_nr": 17 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7838756540325346, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8841725044915145, + "sentence_nr": 17 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.39503194300684213, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6916289318228928, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3094285625931604, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6328843883953666, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.30888995556875376, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6801864286113619, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5512199399393973, + "sentence_nr": 17 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.45862256824436665, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7660160731572102, + "sentence_nr": 17 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47770079267358434, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8053780976175922, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6259358824502687, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8067950339997761, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5296344689827603, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7183083787484315, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7568440125092788, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8347576899702969, + "sentence_nr": 17 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3001800600660342, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6794930944968381, + "sentence_nr": 17 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.18879642915927602, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6584653291380502, + "sentence_nr": 17 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4697979053121435, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7554660353280213, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3164389365959547, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7121929522648841, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.6031798395521694, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.7819677495994619, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.5646631238098637, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.836206348617966, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.36615107686578496, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.696074520676609, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.1543252261021413, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4932064977882042, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.6966863379186454, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7941296295595748, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.5487584440377526, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8692797308530646, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.8787142254774354, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.944457825946867, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.5463887965663883, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.7033378749149323, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.4912217876159168, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7991339910300419, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.7251215108320924, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.8334871013677937, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.587725019570444, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.7957550794048827, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.28856268147560865, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6187787024786685, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4402122771181734, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.7716344099519011, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.18465966669442654, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.503938463452404, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.17973438065210462, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5509051817440759, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.4809103179432793, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.7499547288317748, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.6244070585346295, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.8433626077474702, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.43660156107563336, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7165816705519701, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3748533897614559, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6863935447402433, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3607442374649342, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6876955247522804, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.3718491333506089, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6941552634040441, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.5110976370499285, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.842915559657988, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.5591535564944223, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.8079980831297509, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.11809858631445573, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5943886568930294, + "sentence_nr": 17 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1423170365140828, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.38605131339325, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3230989128220882, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13860487750886114, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36659667376085786, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36295227908523897, + "sentence_nr": 18 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13860487750886114, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36118801210741663, + "sentence_nr": 18 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.40877861250593944, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.16673024281943524, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3975048254243706, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.11262865194228103, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36030161445252334, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3066941236048102, + "sentence_nr": 18 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.409404483413751, + "sentence_nr": 18 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3629681915617596, + "sentence_nr": 18 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12162779391619735, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4136500403395244, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1909693288724605, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4115524982336727, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.14192760409508295, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3989311390496819, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20304460086424203, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4966336271433132, + "sentence_nr": 18 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3935462418730863, + "sentence_nr": 18 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.33523829330170474, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3250861966671464, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3051626462022859, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.30944349609311117, + "sentence_nr": 18 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11556522074454477, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.372688132616477, + "sentence_nr": 18 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.22392361812003433, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.460938469666163, + "sentence_nr": 18 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10704943109718215, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.362953271903766, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.14392660099814805, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.376362134090542, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.11718316363212337, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3844506520287143, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4024646900219184, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.08197539732074254, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.35287478964221025, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3502198678697797, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.376636825008991, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.30372034137078635, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.21481172921264619, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4009028477501074, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.15065778147399764, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4580508275161034, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.21281360709834968, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4292702902558381, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.13780534982274106, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3273034480518148, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.36078900962911326, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.2491467453273127, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.47986445165634506, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.10905122148101043, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4502571446121065, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.274959074733397, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3607206140473947, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.17796237395371306, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.48209511527864385, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.13644487773607678, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.36491236604183974, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.21850577875478958, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4494281444270959, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.31361999490423276, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.1222354265296326, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3727252294250617, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.1109484758001971, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3612426584883393, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.20356858406857398, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.46358366365120834, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.11530762783711283, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3781690117672006, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.11907182322580316, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.49599003474365394, + "sentence_nr": 18 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4220964985804286, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4455062898838481, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.32026140564476524, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4016870075045671, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.34697616124581016, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.40373943351486685, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4184617303786878, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4321132548050678, + "sentence_nr": 19 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3499900041521066, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3822330369569219, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4220964985804286, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4455062898838481, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.41428013900466737, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.425713879206717, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4184617303786878, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4321132548050678, + "sentence_nr": 19 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5102296603076779, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5412065437629714, + "sentence_nr": 19 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.32282559495424096, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.38266426308756574, + "sentence_nr": 19 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4230074457298372, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4432451111759523, + "sentence_nr": 19 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6363676859401174, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6744544901797789, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9271746317040298, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9736668125871423, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6986939462620247, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7821077250864037, + "sentence_nr": 19 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9184678024441792, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8884834862973964, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3797391466432489, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3481158447116987, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.31102805827817165, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3375837027261476, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.19710660977672484, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.2646181750020499, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.3797391466432489, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3274816319655301, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.28493958837889694, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.35876163607595707, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.2485364833746714, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.2873862688213756, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.41664461891968263, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.42600414573009276, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2710684964643971, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.2982841390442802, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.23005567239800093, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.29184715566281483, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.2741455993358603, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.36403543443534025, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.34279101776553306, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.42600414573009276, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.30955822779938535, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.39546682876478195, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.39475108115635776, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.42154888635191134, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.2781617026804374, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.32302333182207527, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.08473168573832755, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.25650903369815853, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.2883871807684295, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.21660761852515356, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.25414220830184964, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.32910644083871465, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.29306886812256966, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.18084108219203518, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.27583433958197495, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.25612947694888455, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3002607987321696, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3216291288446239, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4272249853925079, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.188590266789637, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.26177705380820604, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.3308736026652116, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3875427536757155, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.28432597056103653, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.35944124408933287, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.23631465024334478, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.2692006325646732, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.259615032947222, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.2855780701161316, + "sentence_nr": 19 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.31343233007308363, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28662182336952924, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.289946670354745, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2585958231966256, + "sentence_nr": 20 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1574562620502688, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2833933092608246, + "sentence_nr": 20 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2330649391612961, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2858508520944113, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17248469309075373, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3673041887389201, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28838937143148047, + "sentence_nr": 20 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.25480888745972646, + "sentence_nr": 20 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14839290005301392, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.29565285341782266, + "sentence_nr": 20 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22266775943086, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10508106635796587, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2504422832248121, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22563365567811913, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12913533075470382, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.24776496881674256, + "sentence_nr": 20 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.08680476715745516, + "sentence_nr": 20 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22066482174709295, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12117880855911824, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.32137825349405363, + "sentence_nr": 20 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.20104685618767446, + "sentence_nr": 20 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.25137213099939626, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.28372673673489807, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.164799256779143, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.32187376249458133, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.2969522070783606, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.14440617372843148, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.27200704330334224, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.2442053369522631, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.33050427873462274, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2133219421911448, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.3424665224706109, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8944054777319608, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.24197054442617688, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21682999057776514, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3722897460532404, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.24424323100599224, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.2205591704292585, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3479467223515336, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.1926917267834754, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4545444680350158, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.17580772500133016, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.32957763052496886, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2148084015365523, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.40974307981059804, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.29622141199363383, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.24146688269469918, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.09958408398703665, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.22890983822248492, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.20795712301883962, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.282761705091657, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.2551114536415265, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.18112053860965763, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.3266298821510716, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1423412184218882, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.26467729752192487, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.286072901441292, + "sentence_nr": 20 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2851456053265138, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.09858834583812252, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7445389400758123, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9134769668037408, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2506297252541463, + "sentence_nr": 21 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8320381765431424, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9129044064886581, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.23443139907396643, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.29972668857564216, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12409597120849801, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2928237514438983, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.15083364266523736, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.38662429787924074, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22849324967229787, + "sentence_nr": 21 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.44152236347960977, + "sentence_nr": 21 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2989569143807341, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4042166909648807, + "sentence_nr": 21 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3423939053207622, + "sentence_nr": 21 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.17611268473423294, + "sentence_nr": 21 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.20441543914149457, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.18928624746011372, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.43639616127375797, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.29213008358451265, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5828788445270403, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.14679869139754204, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4021419566569229, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.329340597116918, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6347143291802012, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.2868708266227936, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5779499593492363, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3436610762802303, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2782087319667435, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.632418768195088, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.3083012995502152, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6392851743718383, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.23050898626566632, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.48172150010681464, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.1969221590285716, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5644899370701738, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.32594818888335836, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.49646222671189383, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4604008032403599, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7444026788985108, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.29161716271402766, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.47302621872495865, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6854823532900025, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3546725638586892, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.21468316165048362, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6851126041819388, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.250737833894674, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.40017617077306594, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.27204995504877727, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.2743963944428051, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.6341922683775969, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7252122374710612, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.12586347848916266, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3554854950683664, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3889045463729729, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.20229280648000492, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6194717199605934, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.31114459650134146, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.11856660123276004, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.34601719602607445, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.40072710492884706, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7206046648616748, + "sentence_nr": 21 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.22174147515312165, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2117279815687756, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.33999170096577974, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.29221353951377876, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3058731661111107, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2747352174231836, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.42736771185803385, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.39727964545172, + "sentence_nr": 22 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.10975022749274138, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.13904829787402162, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2873518361947954, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.24505805183333226, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.33495074569972355, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3454509072842772, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.43090467385890824, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3813511699401743, + "sentence_nr": 22 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.22765977642995502, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2247283208344801, + "sentence_nr": 22 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.30931906627981315, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2527893205238235, + "sentence_nr": 22 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9210500207490827, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9069369532463243, + "sentence_nr": 22 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4607778969984477, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8103868370118212, + "sentence_nr": 22 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4885014761119101, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.827819363745503, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.17903870455040152, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.18440575845606422, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.1981763713215807, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.2520139548059959, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.17499310607879404, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.18175908515502465, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.10089587713517954, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.11552870044063634, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3168035112884022, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.30580678632835573, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.10825039887617824, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.1278708456868984, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.29705138694670025, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.2780223931578523, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.18986262747887736, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.18230825914917978, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.42442305789888696, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.42734795538422576, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.18781316135387768, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.16808430602651067, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.3454156644973841, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.30446460704247824, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4479597674250984, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.41132840401983517, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.10704445941620296, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.13527356658034445, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.09941527806251362, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.13609735884978696, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.19230259308735756, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.22211286692050705, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.20383889880388334, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.17813562619757226, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2986551380628858, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.30308773908860176, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.10536111661637193, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.13679626017050403, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.19732230687816163, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.22765162763479738, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.3987203877706927, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.39992851145514274, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.09467800236923245, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.11434380596647938, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.15034676904545285, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.36138016740101575, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.31224382417562974, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.345966570287759, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2816115803298224, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3461146475963348, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.30131374176129855, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1552102601937674, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.1381803727119777, + "sentence_nr": 22 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4967067363118649, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6330776418175281, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.39501632817024007, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5629116515332234, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.44774758283371513, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6323151453499094, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3353166764160673, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5279751808070301, + "sentence_nr": 23 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3340392563357978, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5542299582982266, + "sentence_nr": 23 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2288355034549531, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.469883747317403, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5472915485853102, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7136367183558585, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6159995640523437, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8398584608765305, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5155625728615272, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6435263800797054, + "sentence_nr": 23 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.32206162101132135, + "sentence_nr": 23 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.24125880497129865, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.47825499190432214, + "sentence_nr": 23 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3292010361291119, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5670300297444607, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.46086624699736534, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6510894943437193, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5511532346688224, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7550305399541021, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.34537865578685034, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5956718372193373, + "sentence_nr": 23 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.39080227521872696, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.621048393466749, + "sentence_nr": 23 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2755396296659942, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5033588333252278, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5795086255869999, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7183582779188291, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6214211316495574, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7844755306149331, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6008383045972477, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7291842011448325, + "sentence_nr": 23 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.25418196696822093, + "sentence_nr": 23 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5117784549266909, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.274941620352113, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.4651004879148919, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.22743363869750483, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5634710936922129, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.518836150464752, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6242496691584447, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.5989032124636781, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.7291306908177887, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.2677353447271197, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4460422364967209, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.3558785149067877, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.570837784052645, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2624310277292268, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.4915471393606767, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.28489318277723963, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.6000278331909762, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.5728668995816387, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.7460634178179616, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.46086624699736534, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6510894943437193, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.46507550803536196, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6687857543858925, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.5155625728615272, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6435263800797054, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3639412530979476, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.654342605671994, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.19882981891203355, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.45714526865696425, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.32269274420690436, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.49704406859630557, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.42849655626964983, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.662646931303495, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3937441173550755, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5600824723479425, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.23114663823833642, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5107406700140826, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.419793811546288, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.6152785242440109, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.46086624699736534, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6510894943437193, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.46507550803536196, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6687857543858925, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.2296660762967038, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5259172094145851, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.39501632817024007, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5505822266189535, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3215000448278979, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5947774549102596, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.20870371467330825, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.40726160697608454, + "sentence_nr": 23 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3460579711860666, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.43910565102067395, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.16692770661327389, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2940239540182693, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17589867762235817, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2991014535844428, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15568794672327907, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.30284457998681635, + "sentence_nr": 24 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17340302865304977, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28581037214602456, + "sentence_nr": 24 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1782509297990519, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28710039249342334, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4901491669500622, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5638035394617603, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3460579711860666, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4260473803699743, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2011131382865372, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36314253622836745, + "sentence_nr": 24 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17598839092477797, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28650792027744043, + "sentence_nr": 24 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.15997462319973554, + "sentence_nr": 24 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.24731742205813823, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3980108204104697, + "sentence_nr": 24 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5611872124508993, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7431443902355421, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4465866985385432, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6260699913485588, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4465866985385432, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6260699913485588, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.25509991414681377, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.505614827211273, + "sentence_nr": 24 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.21452424426866915, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.44780791445343104, + "sentence_nr": 24 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23857086413632697, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.47971483823439903, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.446411600799131, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5816697577563045, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4664526119731094, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6399376431552989, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20156032858716424, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4855075115512445, + "sentence_nr": 24 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1526900266679129, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41716995830580594, + "sentence_nr": 24 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.23259933287371404, + "sentence_nr": 24 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20835831728362864, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.49812931259693377, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.17334119484500185, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.31463785312250736, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.12522096513057643, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.335302418196347, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.9100527513271326, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.9584484214161733, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.20156032858716424, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.35007862377558696, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3449632275226908, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5000457205552167, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.15568794672327907, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.30284457998681635, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.1529699053146309, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.35702516223197556, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.15975615838102766, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.16928451900289662, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.40173762794247314, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.15568794672327907, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.30284457998681635, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.16038844415635037, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.30359085570641314, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.3595283251171754, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5790446318474887, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.20563705341552085, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.3762774944524412, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.16692770661327389, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.2940239540182693, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.14165832410287266, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.24107149684266257, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.1258646065963102, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.24857006332411635, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2519649154562495, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.44974180175388206, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3253958243003269, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.45173371737296786, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.27618177741751665, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4305107132988055, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.1683625745315614, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.31167225759119427, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.13728361101885644, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3436250633828196, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.16353712933127018, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.32934735468962634, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.48680589893384085, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6190257724123215, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.1551293035275564, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2674082220133274, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.26091874007348304, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.17598839092477797, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.28650792027744043, + "sentence_nr": 24 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1332399603607437, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19971937750838645, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1834283688193615, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.22588088032876846, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12425342874478343, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1660533764831914, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.15538689193055893, + "sentence_nr": 25 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14158209035366248, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1869416235999822, + "sentence_nr": 25 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0578819658044546, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.16170596160446446, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2206817446345091, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14914968848461002, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.21702090583674813, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.138685682297543, + "sentence_nr": 25 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1258687317121735, + "sentence_nr": 25 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1327332961698289, + "sentence_nr": 25 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23556366957615363, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22894370639738668, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.16684195647378827, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.21420692177337528, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.33150414660895594, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.30808679013173407, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23556366957615363, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.25521078373566897, + "sentence_nr": 25 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.14257880024595157, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1979524022915653, + "sentence_nr": 25 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.14257880024595157, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1979524022915653, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.260711748598298, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.28143225165615565, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.25621420675166556, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.32613185963061736, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.21310996044302127, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2620829676028965, + "sentence_nr": 25 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.08892786873926031, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.14069122234920528, + "sentence_nr": 25 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12273033502938982, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.15070376710164984, + "sentence_nr": 25 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17376029392152273, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22421987263715565, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.07369293827420972, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.08728042965046878, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.12416744870990627, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.06452498627127952, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.09758509152849626, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.09985298970743903, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.22158794642706012, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.20787168962643957, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.05401240601013853, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.07243671671799473, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.1543646468773244, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.09348998462584433, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.130990604448226, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.09885362316286796, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.15900429623613993, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.10903227170832805, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.11481934989482791, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.1745453831609756, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.046916282267844764, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1250076305588977, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.12985392271660248, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.06737080019124615, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.18629057860741663, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.1504281768235603, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.11099491388125307, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.1201070010200949, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.08702826664587757, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.42262353460370816, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3966051357904673, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.09612004569821603, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.10249207815381514, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.1341907303110576, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.11635402454082566, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.1636348970852316, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.06028131279303415, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.0901676620993871, + "sentence_nr": 25 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6983671476675032, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6697193437120026, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5809024483660724, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5409616569206442, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5893051076561628, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.555242666304663, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5197038614969076, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4944106522194635, + "sentence_nr": 26 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5863087308455573, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5756247354842696, + "sentence_nr": 26 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.44763438063632005, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4327706284829231, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4562933372999328, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4354000091116894, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.650945489442927, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6025447507087655, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5040260890269513, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.48159079549233025, + "sentence_nr": 26 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3966338449810425, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3940867714969907, + "sentence_nr": 26 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3186669369694382, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.34867169182256896, + "sentence_nr": 26 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6350785093832516, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6188888500556722, + "sentence_nr": 26 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7997394936755756, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7811228513409922, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9660854289024723, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9613867167137871, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7158159753911548, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7127947486849641, + "sentence_nr": 26 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6813410498464633, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6671821168913319, + "sentence_nr": 26 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.45066539224706753, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.4254592023616511, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.45779216736532874, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.40945502186629257, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.40071581088356767, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.36844216279073794, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.14609848125563302, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.18504017619904287, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4184317523303411, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.40500270963162277, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4125433652059801, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3955923992862865, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.47182538941865537, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.42450279333172475, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.46492333059956836, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.4401112788616263, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.3967795858478363, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.3803134453035716, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.34915707707242977, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.34988691421168616, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.2613611691981996, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.2740054517113319, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5600863252474344, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5179797138258272, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.3461243385522883, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.3560268535895035, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.43650008892828823, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.42551924250056755, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.504580863725975, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.46703102558879955, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.36954961729302616, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.34760122558190465, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3803026331533805, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.362200056491149, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.24777987943516128, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.2952194113831596, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.5258092834799059, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4981801549352249, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.48625052891235754, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4290939038872796, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.4045007320789693, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4098113348256027, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.44158642009003995, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.40903259597127894, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4946406341236379, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4719975064311173, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.41182432358851845, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4034715718148006, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3693186725771347, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.36304188784855995, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3692675983091899, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.37402683054534963, + "sentence_nr": 26 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7645786047678913, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8655501219338723, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8643729226327672, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9215030582508996, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8643729226327672, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9215030582508996, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7858164289172753, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8872272977237059, + "sentence_nr": 27 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8643729226327672, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9215030582508996, + "sentence_nr": 27 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6898913050782208, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8620687741940413, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6898913050782208, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8528837782425732, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7708719635370461, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8793197587693242, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7708719635370461, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.888538633093067, + "sentence_nr": 27 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6840689169974626, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8314419144081646, + "sentence_nr": 27 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5819799380263497, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7407958979814505, + "sentence_nr": 27 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7645786047678913, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8655501219338723, + "sentence_nr": 27 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.41098733201100757, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.651283133493195, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6152755816095169, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7669297251133314, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4250002996145258, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6670552714553488, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3735617779670567, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5773479111816255, + "sentence_nr": 27 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5543498698280007, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7266847297604082, + "sentence_nr": 27 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3716332023564544, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6132388888021502, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6986939462620247, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8497711598086016, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5072570733389083, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7124868368374351, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5907596734005102, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7837270250239556, + "sentence_nr": 27 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10008881112800158, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.29125356488795046, + "sentence_nr": 27 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.041649157343430596, + "sentence_nr": 27 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6587480145435196, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7917841426705801, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.7446828000198126, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.885521980076414, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.6466833757622275, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.7737914417145209, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.4447278656331358, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6742569711624775, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.600047216971444, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.7511423755179258, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3382340617900419, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6182585373365673, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.6069548573053054, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7630436854704967, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.40482952759410495, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6241130944295542, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.5021718181363274, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.697189669759932, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.7858164289172753, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.8717639062922423, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5731680012014568, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.746935173521359, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.7224037170215811, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.8452672523905139, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.5724496367057007, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.7350859720106757, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.42250552136302394, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6425389837629188, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.7645048342610411, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.876234192352485, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.45751787171307623, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6647794363792763, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.7623067286250759, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.8682092620191191, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.46189821859121283, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6442319235751083, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3931991982536581, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6422735790483707, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.44644290381704027, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.6892051604181435, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.4000177797533498, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.645169701736652, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.4479818542603719, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6761961025641056, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.7123871749204508, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.8331784519293958, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.5749089871602278, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.7211428196508521, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.38506289173931413, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.6152360906748179, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.6231488481063673, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7734960210241439, + "sentence_nr": 27 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6833569517560225, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6833569517560225, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.693261298341864, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.693261298341864, + "sentence_nr": 28 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6745016003476486, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8511670783317596, + "sentence_nr": 28 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.32329508170352383, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6141330847741713, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6833569517560225, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3837983925863447, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6366757448341102, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6745016003476486, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8511670783317596, + "sentence_nr": 28 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 28 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6667025833042813, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.746973053424487, + "sentence_nr": 28 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6745016003476486, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8511670783317596, + "sentence_nr": 28 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.217554942150074, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4859163400220353, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3008656294855478, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5198655773563042, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3008656294855478, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5198655773563042, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.203264842568494, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4965705242699611, + "sentence_nr": 28 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.32079058840140134, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5094305382960898, + "sentence_nr": 28 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23693055763743093, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4968400811224627, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.32079058840140134, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5094305382960898, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.35479105265934485, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4725761870926308, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3301899334885226, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5632801217523468, + "sentence_nr": 28 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 28 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1923904871441659, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5825915593253297, + "sentence_nr": 28 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.32079058840140134, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5094305382960898, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.4892199210635081, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6263002679299042, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.09147827112247602, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3360691966057836, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2966218714191134, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5348497180679597, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.3008656294855478, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5209701084013916, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3254074668234594, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.540582703782851, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.22935466869603194, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6357138961264384, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6833569517560225, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.38769943713308697, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6179897670313796, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.35964066074252593, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5418421848087059, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.31666472263798334, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5096984883597744, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.2656621439255861, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.47187800221660153, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.41583634222861793, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6558319092753532, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.26633048164380024, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5038200170930055, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5371525807924681, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7677378485184402, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.15274299622833287, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4692950277268683, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.30626101600123445, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.583891679561264, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.18137691349228668, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4586072719105437, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.23443677523946913, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5163278972706644, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.33876931708826047, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.550413577565279, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.14207405313947058, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.47874702297210975, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.2539342198718324, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.46375067718601715, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.200726550812963, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.41645295439394076, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.42995245074388394, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6515566568079457, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.2834052290575623, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4974109921343301, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.19454290935168927, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.49909763892228687, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3837983925863447, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6379993550810827, + "sentence_nr": 28 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1481394578697113, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.30063818852404856, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14216645907653844, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2737034564138708, + "sentence_nr": 29 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14939354788683526, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.29041654772860626, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5420662441541858, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5445089463670787, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.40919282596076484, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5542936932152527, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5928902071159559, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.647817438132439, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5928902071159559, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.647817438132439, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.34641959937802264, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.47549559716182727, + "sentence_nr": 29 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.42461633178803443, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5603699277937889, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2340216139262901, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4522093023662336, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.4132352454218328, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5544725906870476, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3951500216160541, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6089660957340174, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.42282359171428024, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5395092365663595, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.35412968165085734, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4985795126785612, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.1598921499894403, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.390187618292215, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.2400540439585043, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.49297433772099697, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4806604068305994, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.664228268001068, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.2340216139262901, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.45184273575809186, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.16533113836624475, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4074791764578974, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.28547397706062927, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4838477808123968, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.6053011982655683, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.652613765735072, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.4229247984636106, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.556465536088555, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3471790743028735, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4458106286047354, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3555508425572384, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5387745992013905, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.1709686260975486, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3940091304204109, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.25958657290343434, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.43162699627918094, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.2213908395073965, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4213527844474163, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.39696685122270786, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5497060467823045, + "sentence_nr": 29 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 14 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 17 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 17 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 28 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 28 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9411583614202783, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9389202454786235, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8775848642818888, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8618703443763697, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7861888156926622, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7987489460131649, + "sentence_nr": 0 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9319748402595084, + "sentence_nr": 0 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7613425680699503, + "sentence_nr": 0 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8493237569441244, + "sentence_nr": 0 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9020031517329425, + "sentence_nr": 0 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8493237569441244, + "sentence_nr": 0 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.883570112979728, + "sentence_nr": 0 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8263460336753243, + "sentence_nr": 0 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8060322164809728, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8980680846396624, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9491059403137463, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9664300701360793, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9457224261353452, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9708225134054753, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9419324607589119, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9619002332717353, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9189927159116271, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.895905738615658, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8719916488298841, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9397108105925289, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.884345665982421, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9584454525436005, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9643081480127652, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9067144042813564, + "sentence_nr": 1 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8781616442886918, + "sentence_nr": 1 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9745733081082687, + "sentence_nr": 1 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9237743711831492, + "sentence_nr": 1 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9659571253320222, + "sentence_nr": 1 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9044755244774213, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9016506657203592, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9259203238585231, + "sentence_nr": 1 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9226314544302758, + "sentence_nr": 1 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6237003645369218, + "sentence_nr": 1 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.919365977563579, + "sentence_nr": 1 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9113270242697518, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.898943894327586, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9736119227904283, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9415432301630186, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.973004167300919, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9617726716367615, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8788632576179716, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9442690941930104, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9167527970009353, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9264966822048945, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9760432643638268, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9290639912797567, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9451284616565533, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9571970948049097, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9428452278208271, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.924510998540744, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9354255661287414, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9038448099971822, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9290214610132344, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359307328554756, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9462257677914746, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9685511109758306, + "sentence_nr": 1 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9466350739636148, + "sentence_nr": 1 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7785501063601203, + "sentence_nr": 2 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8677672451180615, + "sentence_nr": 2 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9519685270619841, + "sentence_nr": 2 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5524309559543085, + "sentence_nr": 2 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8979970994003059, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8979970994003059, + "sentence_nr": 2 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9598023304313453, + "sentence_nr": 2 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8678877090803476, + "sentence_nr": 2 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.3628854370408249, + "sentence_nr": 2 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8866932684030095, + "sentence_nr": 2 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7932574787392968, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8840632918991035, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9244224424282228, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7493760739956499, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9434070582654602, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8626111481890223, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9742381587466754, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9614829239512629, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9634058264556766, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.846746937646691, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9416090102549223, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9586487245465463, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8628736669093499, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8883148663773122, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.921000444185013, + "sentence_nr": 2 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.964284245003951, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899852954654377, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5884852453065169, + "sentence_nr": 3 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8943359440390058, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6239646156236577, + "sentence_nr": 3 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8782485779028959, + "sentence_nr": 3 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9219735185328113, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8379214027434272, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9171135147465285, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8793006100154936, + "sentence_nr": 3 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6764135013792538, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8320911917964368, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8320911917964368, + "sentence_nr": 3 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9020259333664543, + "sentence_nr": 3 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8443316591536836, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9062739514559724, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9243814194896306, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9257122714800141, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9048929676970495, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9233238051356927, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8961117810241208, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9137011072166213, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9144918070375806, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9447475462972004, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9418568225974095, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8631885674989124, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9540570534869818, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9356691952085903, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8263666332486633, + "sentence_nr": 3 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9187937618702817, + "sentence_nr": 3 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6492261286778312, + "sentence_nr": 4 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4782990117524071, + "sentence_nr": 4 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8728890059382535, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7924841060781368, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8728890059382535, + "sentence_nr": 4 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8085699807438939, + "sentence_nr": 4 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9309167160514913, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8335210974928002, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9407617520385465, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9009704508776215, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.886161550229872, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8864780713525466, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8619950335517561, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.877644990158928, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9473578431592224, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8989284887461744, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8982857165205713, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9421743042333945, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.909430339396572, + "sentence_nr": 5 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9114715597392106, + "sentence_nr": 5 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221676855227006, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.903310364652346, + "sentence_nr": 5 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.43631872104818037, + "sentence_nr": 5 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.725100223395414, + "sentence_nr": 5 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8342041754812477, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7541096773855238, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9352893606252747, + "sentence_nr": 5 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7137044016250488, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8459329201101423, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9155785169978052, + "sentence_nr": 5 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.454243405917021, + "sentence_nr": 5 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4367071875067552, + "sentence_nr": 5 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9053865214400596, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9344907300105301, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.941467473244312, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8759462570863868, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9116059567890715, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.95453015576562, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9271804273091313, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9494380676747487, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8907525765155897, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9420326057327402, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8729192735278123, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.840210783941434, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8830406923187026, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8705872791986208, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9129896861855028, + "sentence_nr": 5 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9775140091004713, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.931908394385036, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.958499216692883, + "sentence_nr": 6 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9787648208394673, + "sentence_nr": 6 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8848447424869419, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9476480635849643, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8420296194650692, + "sentence_nr": 6 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9645398026978572, + "sentence_nr": 6 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.976975965491712, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9575751193892209, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917870378110458, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917870378110458, + "sentence_nr": 6 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.922108923148009, + "sentence_nr": 6 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9367021384173281, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917870378110458, + "sentence_nr": 6 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9136709169732016, + "sentence_nr": 6 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9031487241080103, + "sentence_nr": 6 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.922108923148009, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9717329164232313, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9810420842974353, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9296061535584738, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9548717794727779, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9723617284409432, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9433216405879152, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9253992588631311, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7833761650543694, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8958698547783525, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9659983030155975, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9368374793769542, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9292848975349729, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9705333075369675, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9560908971572966, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9288860917142431, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9402643484548583, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9303023646781129, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9076656012518489, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9272618174968876, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9630829363546703, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9437691960187881, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9233897890679653, + "sentence_nr": 6 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9217593594034571, + "sentence_nr": 6 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9429459010031568, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9680340601535599, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9444947592571505, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9524237679532525, + "sentence_nr": 7 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8521740000505951, + "sentence_nr": 7 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9147273981117778, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9353915284262971, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9575256886848735, + "sentence_nr": 7 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9544425909905248, + "sentence_nr": 7 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.887089742205764, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8937272463225717, + "sentence_nr": 7 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221577416896909, + "sentence_nr": 7 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7360571605491374, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424882191492142, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9158962896380519, + "sentence_nr": 7 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9016185053131788, + "sentence_nr": 7 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9519313199322048, + "sentence_nr": 7 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9002497361613263, + "sentence_nr": 7 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424882191492142, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9404564646985731, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9525612663771642, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9416090102549223, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.946182450185975, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8827665860178672, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9242269657430007, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9209375409360453, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9453162319718537, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9354735336178899, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9650606723493668, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.937172702008466, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9037456319061896, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9527540439558733, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9777992945719618, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9483614149601093, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9630476322301069, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9090634311284931, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9592439701684463, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9352813563171796, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9578898822826803, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9349087092124988, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9553475775967099, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9426144990998162, + "sentence_nr": 7 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9455357310467346, + "sentence_nr": 7 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9950087915805451, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9950087915805451, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359599516797827, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9950087915805451, + "sentence_nr": 8 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8529883661830301, + "sentence_nr": 8 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9313047211019367, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9311406569876187, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9392038901097501, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9504743930445531, + "sentence_nr": 8 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9950087915805451, + "sentence_nr": 8 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9283998656503502, + "sentence_nr": 8 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9934034758807603, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9131528589305679, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9917679206284817, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9566767123929576, + "sentence_nr": 8 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359924521743563, + "sentence_nr": 8 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8893588081911743, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9583698738001583, + "sentence_nr": 8 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9934034758807603, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.946392812169666, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.911875333930421, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9169315433407361, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9541325707307038, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9648123726963476, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8370298547932784, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9334875203861144, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9413496332501932, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9956823103485622, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9457390517164731, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9671298665063969, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9336521523423332, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9502062892893858, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9333019767772176, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9037394051488277, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9283644587512466, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9237582925385585, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8995566191566017, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.896344147038989, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.09821094254330615, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9548273305811203, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9251737690567995, + "sentence_nr": 8 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9275689564213165, + "sentence_nr": 8 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9272442008199501, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9520060001290835, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9058859200742604, + "sentence_nr": 9 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8789724147701462, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9137645544850267, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8969027357279203, + "sentence_nr": 9 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9520060001290835, + "sentence_nr": 9 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9275374047069039, + "sentence_nr": 9 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8772309014828462, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9140052999897977, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.868350408637765, + "sentence_nr": 9 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7563541659131354, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8578315979157695, + "sentence_nr": 9 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8441075622700097, + "sentence_nr": 9 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.23829288001976573, + "sentence_nr": 9 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9407267756704489, + "sentence_nr": 9 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.831845583109951, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9530684796567226, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8984174935165463, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.946008414943598, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9285885624039975, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9645189965938258, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9601667560566091, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9113133701465544, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9363094557613988, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9499594621802195, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8850558582872771, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9413520522974334, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8953760832780698, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9516191368774216, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.873135905690596, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9331628274049639, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9350921637704382, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9398175409358328, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9675093986501344, + "sentence_nr": 9 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9282207391671503, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.957452925924953, + "sentence_nr": 10 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8937237551170429, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9256331955884847, + "sentence_nr": 10 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.904390835311888, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8995954000535624, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.928962868887516, + "sentence_nr": 10 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9339798045072082, + "sentence_nr": 10 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8008809042180175, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9240001424211951, + "sentence_nr": 10 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.3493344613894351, + "sentence_nr": 10 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.92829327413418, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359271530286619, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9641555435524619, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.90719289051837, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8543701176038877, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9045960456690756, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9576659929734302, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9445842802137389, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917893569547509, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9031282594956593, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9325823323160847, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9171277146973622, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9631220314707449, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9125575210703364, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9671298665063969, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8583796678495444, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917870378110458, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9075511178990168, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8942877287874674, + "sentence_nr": 10 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8881782096383685, + "sentence_nr": 11 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8452994228892592, + "sentence_nr": 11 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.857664755026069, + "sentence_nr": 11 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7687402404428638, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9425182378610694, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8221659843346086, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8685375697135141, + "sentence_nr": 11 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7860944644568774, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7829829019188287, + "sentence_nr": 11 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9425182378610694, + "sentence_nr": 11 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.289269703803095, + "sentence_nr": 11 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7829829019188287, + "sentence_nr": 11 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9425182378610694, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.915813486906383, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.928671169616198, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9195852720074569, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9260563505342738, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8580715674095071, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8991782906832555, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9549429726485847, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8571447284090962, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.953599772014362, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9382091007325469, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9125682774652475, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9084959093441131, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9389584881035126, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8968120926569282, + "sentence_nr": 11 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8757339860702672, + "sentence_nr": 11 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9742989957563788, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9854564066904739, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.938338375356983, + "sentence_nr": 12 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9363458435045497, + "sentence_nr": 12 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9275189832478317, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9680610688075657, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9458276502828801, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9555270393882619, + "sentence_nr": 12 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.774972667720128, + "sentence_nr": 12 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9238483556315539, + "sentence_nr": 12 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9292605756517186, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8710905917506855, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8877998658561537, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9408832971568818, + "sentence_nr": 12 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8758560882945217, + "sentence_nr": 12 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9047504210526172, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9149458726191051, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9497380252636716, + "sentence_nr": 12 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9247145535687903, + "sentence_nr": 12 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 12 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8829314518141973, + "sentence_nr": 12 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9497380252636716, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9369900232316837, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9584772514045287, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9656526051593539, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9262800142753679, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9178799098053634, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8988056403515298, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9240902217687106, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9454713149117651, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9457650793019858, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9115531547253959, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9403725471773088, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9107758326980321, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9251111872988325, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9267004903727016, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9652440580136615, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.924254800539438, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9054967244578502, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.886673201587762, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9082204179924286, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665046359304257, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9492870842156111, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9495327576081029, + "sentence_nr": 12 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9605742681789634, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9410712595774171, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.971921146040729, + "sentence_nr": 13 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8360964435901039, + "sentence_nr": 13 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9278436686065653, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9540941235545723, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7765803419515074, + "sentence_nr": 13 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9368660209060221, + "sentence_nr": 13 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9179315685239186, + "sentence_nr": 13 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9198867501155861, + "sentence_nr": 13 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9357668560693397, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.880651835588671, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9322025130978147, + "sentence_nr": 13 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8666701669384438, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9301584319196643, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9201441893603447, + "sentence_nr": 13 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4518476286184633, + "sentence_nr": 13 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8817151383770689, + "sentence_nr": 13 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9719892276800867, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9232252378020026, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.90340499273861, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9445601279006905, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9284637794790105, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9506720475284802, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9650672132857259, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.935825271074837, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9417006532894496, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9180957642017807, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9336273124319283, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9199623581249377, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9420383150390214, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9169222881606529, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9358954768171188, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9210475526688618, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.900422383617428, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665042848270522, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9195975724156285, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9482591669689567, + "sentence_nr": 13 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.894400898846725, + "sentence_nr": 13 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9683895601588671, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.974733551222386, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.935724475087967, + "sentence_nr": 14 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.945278116491169, + "sentence_nr": 14 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.913976993531483, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9206503738833902, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8879551150411227, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9093507960484853, + "sentence_nr": 14 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.44325871778061554, + "sentence_nr": 14 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8629899790604912, + "sentence_nr": 14 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8839868610728687, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9659019608247615, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9612040783142544, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9355702448711621, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8575724679460186, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.919154316989783, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9107041155041439, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8860042875765471, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9163443895096822, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9513360683724416, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9506442510575418, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9210869399305139, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8602965545640948, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8912610518101419, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.857937519719319, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9528771181894694, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9241995664234885, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9432104991415542, + "sentence_nr": 14 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8927784164557715, + "sentence_nr": 14 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8681309346882299, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9045257596276787, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7552111299277484, + "sentence_nr": 15 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.82396628763246, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8809116426093319, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9069369532463243, + "sentence_nr": 15 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8255413975339149, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9116712045344968, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8855094439275503, + "sentence_nr": 15 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8809116426093319, + "sentence_nr": 15 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5085021700346579, + "sentence_nr": 15 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8155954216287978, + "sentence_nr": 15 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8849766832597384, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9321985099431636, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9158869153954171, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8940299169999223, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9029209331114941, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9434784706316768, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9504499063681887, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8103402263404181, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9033542015144801, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8920851535963175, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9012698346023688, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8815241253287673, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.955434974676454, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9190034267575142, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9028341607528202, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7933760889502307, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9669111778196173, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9183552099282611, + "sentence_nr": 15 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9213964969470535, + "sentence_nr": 15 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9438561056375272, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9245427558640842, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9466217999433078, + "sentence_nr": 16 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8600910973378976, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5465479162881712, + "sentence_nr": 16 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.908088143295894, + "sentence_nr": 16 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8689979953554426, + "sentence_nr": 16 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8837997874830685, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9430526976186369, + "sentence_nr": 16 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7213258253735133, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8583796678495444, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5352913894873965, + "sentence_nr": 16 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7506613813658406, + "sentence_nr": 16 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9120029292560927, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.969258616291086, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359933426460225, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8446197069920836, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665537794677691, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7519024768911576, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9419599049218603, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9100379761498075, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9243062555931161, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9232535952320629, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9430158926147498, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8923268998495886, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9316958873367511, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9441083273271286, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899852954654377, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9451690574618664, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9470556595464068, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8625414653847894, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8658510104009289, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.938651167013012, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9362303281043904, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9288883358178652, + "sentence_nr": 16 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7378741057437793, + "sentence_nr": 17 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.898904151376881, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8446522700991944, + "sentence_nr": 17 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9053865214400596, + "sentence_nr": 17 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8700885813654318, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9331139325257429, + "sentence_nr": 17 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8860497305091617, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8222704990602537, + "sentence_nr": 17 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8402559609277754, + "sentence_nr": 17 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7386088026745246, + "sentence_nr": 17 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.06557474419143802, + "sentence_nr": 17 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8700885813654318, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8771568927591851, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8869070241487921, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8173012945645394, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8220012279932035, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8449397341788647, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9239069749524619, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8972504357155736, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6602446784708298, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8667833154965509, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7306831212016971, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7306831212016971, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7406377967705062, + "sentence_nr": 17 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8509760908759664, + "sentence_nr": 17 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.96926930549605, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8641726957145408, + "sentence_nr": 18 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9637804258017773, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9240863542577373, + "sentence_nr": 18 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9450374119495017, + "sentence_nr": 18 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.96926930549605, + "sentence_nr": 18 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424882191492142, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424882191492142, + "sentence_nr": 18 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9392663489644577, + "sentence_nr": 18 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8389799674466019, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9253208187778743, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221577416896909, + "sentence_nr": 18 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221577416896909, + "sentence_nr": 18 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9144266092886102, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9500117624130617, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.905862662289465, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9003734503251455, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.858544407149412, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9281598514152588, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.948121913854874, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9629589146416885, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9580736862318411, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9708835294542548, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9234823141384267, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9939521304203686, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9474838221026617, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9488355997601815, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424390135303181, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9711070259637357, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9237920416869381, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8942780008373756, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8468261925085733, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8976119317111001, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9527352893094178, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9510981354135275, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9521144628004171, + "sentence_nr": 18 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9774592733638915, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665042848270522, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9806060444395596, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9049668032095894, + "sentence_nr": 19 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665042848270522, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9667317239059525, + "sentence_nr": 19 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9159800198090925, + "sentence_nr": 19 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9667317239059525, + "sentence_nr": 19 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8925738398388144, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9058585844143391, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8888787903169728, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8925738398388144, + "sentence_nr": 19 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9128855680689195, + "sentence_nr": 19 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9272821491047395, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9413354408985303, + "sentence_nr": 19 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.46619006556188114, + "sentence_nr": 19 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.913896513382741, + "sentence_nr": 19 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9058585844143391, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9557922260754473, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9391656780027514, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9260113686541587, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9419307613884336, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9845996986850503, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9255228522887315, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.889174440461237, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9496761617043387, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9322360743819351, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.935492418630274, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9456325305487512, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9325466173278317, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9240800356922247, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9361690788124847, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.938043640398588, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.901373116210745, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9531605377803356, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9132591460407243, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9494481589794223, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9415361564397403, + "sentence_nr": 19 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.897450557161678, + "sentence_nr": 19 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.937002127196651, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9435408381256087, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9421449698305296, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9607456319189528, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5467617051776391, + "sentence_nr": 20 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8969209805167669, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9060555921929084, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8969209805167669, + "sentence_nr": 20 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9096430262961498, + "sentence_nr": 20 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7983940190154283, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9132591460407243, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9204057102575467, + "sentence_nr": 20 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4968312722246179, + "sentence_nr": 20 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8001971757912975, + "sentence_nr": 20 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9204057102575467, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.95112146871187, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.949624286506194, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9588139991437585, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9236414681715879, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9453633691396565, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9278367059866518, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9302237306555959, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8441460025255829, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9470556595464068, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.951863030034636, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8944443568631728, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9082204179924286, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.90717359411325, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9014597856352894, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9250084453288043, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.95462554022758, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9318340131711181, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9736147802901586, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9182449217144187, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9400180064454685, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9255769217104873, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9309426923102619, + "sentence_nr": 20 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9002012094811458, + "sentence_nr": 20 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9690017425712892, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6924365679057801, + "sentence_nr": 21 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.804543317337012, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8385395593542468, + "sentence_nr": 21 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9515560914045473, + "sentence_nr": 21 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.854435717190483, + "sentence_nr": 21 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7570244995532351, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6676892344393273, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.873135905690596, + "sentence_nr": 21 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6885773376269438, + "sentence_nr": 21 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.46961217063286037, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8379214027434272, + "sentence_nr": 21 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7570244995532351, + "sentence_nr": 21 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.20981645725460496, + "sentence_nr": 21 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6659995521111991, + "sentence_nr": 21 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7570244995532351, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8388678282825207, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9180596829241628, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9166274634412449, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8626786769008709, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7991709881281639, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8119656541607598, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8872308158649556, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8914910756561332, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.927494511055529, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9528614248210486, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8523282278495175, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9297633204435644, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9278042759794851, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8998995790099074, + "sentence_nr": 21 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9302677881301988, + "sentence_nr": 21 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9522511234396616, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7585159184184324, + "sentence_nr": 22 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8922770448230282, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9126128133576369, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6245412677586388, + "sentence_nr": 22 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.868233862673363, + "sentence_nr": 22 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8852329532489643, + "sentence_nr": 22 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8378994642516495, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8775848642818888, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8493237569441244, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9448292727000915, + "sentence_nr": 22 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8555426729178464, + "sentence_nr": 22 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7558344174949267, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8497451239178159, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8493237569441244, + "sentence_nr": 22 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8199763712080639, + "sentence_nr": 22 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8206722459046871, + "sentence_nr": 22 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.883570112979728, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.914786293186172, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8845568645036501, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8937192042814042, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.901348698020278, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8382013802825361, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9219786709510569, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8767649499531999, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9094880423990607, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8719390074611821, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9349020382990011, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9272997117562144, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8962185446474815, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8930034245249151, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9271664513693498, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8936606750264663, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8803360259381345, + "sentence_nr": 22 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8680210960657176, + "sentence_nr": 22 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9699436870249787, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9699436870249787, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7931982206364059, + "sentence_nr": 23 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9699436870249787, + "sentence_nr": 23 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9438398456065387, + "sentence_nr": 23 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9281186022380125, + "sentence_nr": 23 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9699436870249787, + "sentence_nr": 23 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9184823166209557, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8839868610728687, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8884834862973964, + "sentence_nr": 23 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9300073119656489, + "sentence_nr": 23 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9381606131991436, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8839868610728687, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9300073119656489, + "sentence_nr": 23 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4072337657555589, + "sentence_nr": 23 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9184823166209557, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9293646790023864, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9511392272878579, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9293879632586071, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9277950353049101, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8843378183459343, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8741633139531418, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9271525909282003, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9736840552120738, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9396084767892234, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9187563342696414, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8896752045577786, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9737097349915758, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9330058893011377, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9570066548501687, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9201684039669155, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9133901345922595, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9458636432813123, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917857433142856, + "sentence_nr": 23 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9375412439691305, + "sentence_nr": 23 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9556267474396976, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9489054429933926, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9489054429933926, + "sentence_nr": 24 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8327628422929998, + "sentence_nr": 24 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9249365863966041, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.922528755167094, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9486938895906879, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 24 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8620685016584069, + "sentence_nr": 24 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9405916043682414, + "sentence_nr": 24 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9327915990783561, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.909738029095061, + "sentence_nr": 24 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.909738029095061, + "sentence_nr": 24 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8707492337114523, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.95883735444933, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9455007606735264, + "sentence_nr": 24 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9372630850025364, + "sentence_nr": 24 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5983897920478856, + "sentence_nr": 24 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9299762198228243, + "sentence_nr": 24 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9302303599426779, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9544609413449265, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9355306533611718, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9432457481338326, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9472285181144658, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.923828763793418, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9224761498105726, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9756278595118478, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9499594621802195, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9544238060448419, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9012364553153411, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8199585012210312, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9280048312907723, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9587462450914201, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8938919301593574, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9507758066685948, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9432005035367906, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9675203656708941, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9303385434730891, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9401106918306472, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9533532275954528, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9274629860503822, + "sentence_nr": 24 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8926908826740254, + "sentence_nr": 24 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6224897798032885, + "sentence_nr": 25 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7893575827661004, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9478696521177714, + "sentence_nr": 25 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7160421907140165, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6217685026572488, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.794919886900137, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8793006100154936, + "sentence_nr": 25 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5916523997385489, + "sentence_nr": 25 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4849269488253923, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7462718113811923, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8083701726292805, + "sentence_nr": 25 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.722502153449955, + "sentence_nr": 25 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5544920599877754, + "sentence_nr": 25 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6853792233736985, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9200538056807258, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9630774769374594, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9143443086107108, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9052744049140443, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9018850910676268, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9142574363760879, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9168431011517528, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9141901633008906, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9317477810881586, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9354759108346813, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9141453314674155, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9550191440621234, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8402328635525613, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.831845583109951, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9092382099397807, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9770044719642067, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9155318202784664, + "sentence_nr": 25 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8272309965382391, + "sentence_nr": 26 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7767725512278205, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9373981486656514, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9579023880929557, + "sentence_nr": 26 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9268329536813669, + "sentence_nr": 26 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.904428807825769, + "sentence_nr": 26 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9116613044583819, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9084279839455062, + "sentence_nr": 26 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8013174743750245, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.34811585804131506, + "sentence_nr": 26 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8912610518101419, + "sentence_nr": 26 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9084279608664247, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9342971539350323, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9618018909441389, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221850850049388, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9621502301102783, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9207497282487874, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8817316559043479, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9595521389704431, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9604273088099046, + "sentence_nr": 26 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8832167531630292, + "sentence_nr": 26 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9618018909441389, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9338423795983638, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8069582822584229, + "sentence_nr": 27 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9432051372011929, + "sentence_nr": 27 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8875472267363329, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8538919155402751, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8601111478550084, + "sentence_nr": 27 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8274840531521687, + "sentence_nr": 27 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8737243337458652, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8875472267363329, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8213297311895551, + "sentence_nr": 27 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.3007622907436899, + "sentence_nr": 27 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.906379768806771, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8996352283472103, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8577239523880982, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9705288278234159, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9022302698191352, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9618116705103616, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9282902444420971, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9283062281157928, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9143841728614055, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9148205155364358, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9429357495928096, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8199038085123204, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9007500710615358, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9335504867261654, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8519148326217993, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9467340802817513, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8914166352994622, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8697448206881571, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9091527400737927, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9487286082082608, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9550331732946552, + "sentence_nr": 27 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9703747509928279, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9540941235545723, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9543144589160125, + "sentence_nr": 28 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.652649628941592, + "sentence_nr": 28 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9767775472269087, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9393628940364738, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9410712595774171, + "sentence_nr": 28 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6444379795256558, + "sentence_nr": 28 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8964898605551818, + "sentence_nr": 28 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9263597385884417, + "sentence_nr": 28 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899546929868499, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899546929868499, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 28 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.824741266541094, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 28 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8283905649271065, + "sentence_nr": 28 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 28 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.881413837458117, + "sentence_nr": 28 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899546929868499, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9437940294094723, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9269703177791706, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.944904344834561, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8479413107328494, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9715595760527852, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8595969327963556, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9538713542813556, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8348508116391393, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9572462820044535, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9375119517314923, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9493167367596885, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9344916654109876, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9849529115133767, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9275259780895282, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9276874028790393, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9473074618830379, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9460494618521745, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8923268998495886, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9455007606735264, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9526558782357073, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9338345156544289, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8995764072227389, + "sentence_nr": 28 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9505226544098013, + "sentence_nr": 28 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9630841609539229, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9451142647196181, + "sentence_nr": 29 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7510122845400926, + "sentence_nr": 29 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8090165300577936, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9543128468386116, + "sentence_nr": 29 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.920197561569537, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8759929746436435, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8935424392990651, + "sentence_nr": 29 + } + ], + [ + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7769676399488106, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8875472267363329, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8664932988313133, + "sentence_nr": 29 + } + ], + [ + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.906379768806771, + "sentence_nr": 29 + } + ], + [ + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8001297194719582, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9048724843551281, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8947987168857687, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9489238765618674, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.945278116491169, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8989194854163256, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9061728639858796, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9501419212325259, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.891206254843651, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9331628274049639, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9046319474149982, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9463095328863311, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9085828484030862, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8856061163721227, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9661878700572512, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + } + ], + [ + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.908669313428767, + "sentence_nr": 29 } ] -} \ No newline at end of file +] \ No newline at end of file