diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -8146,6 +8146,14 @@ "type": "Commercial", "license": null }, + { + "id": "meta-llama/llama-4-maverick", + "hf_id": null, + "creation_date": "NaT", + "size": null, + "type": "Commercial", + "license": null + }, { "id": "meta-llama/llama-3.3-70b-instruct", "hf_id": "meta-llama/Llama-3.3-70B-Instruct", @@ -8171,15 +8179,7 @@ "license": null }, { - "id": "mistralai/mistral-small-24b-instruct-2501", - "hf_id": "mistralai/Mistral-Small-24B-Instruct-2501", - "creation_date": "2025-01-28T13:30:13+00:00", - "size": 23572403200.0, - "type": "Open", - "license": "Apache 2.0" - }, - { - "id": "mistralai/mistral-nemo", + "id": "mistralai/mistral-small-3.1-24b-instruct", "hf_id": null, "creation_date": "NaT", "size": null, @@ -8194,14 +8194,6 @@ "type": "Commercial", "license": null }, - { - "id": "google/gemini-2.0-flash-lite-001", - "hf_id": null, - "creation_date": "NaT", - "size": null, - "type": "Commercial", - "license": null - }, { "id": "google/gemma-3-27b-it", "hf_id": "google/gemma-3-27b-it", @@ -8218,6 +8210,14 @@ "type": "Open", "license": "Apache 2.0" }, + { + "id": "deepseek/deepseek-chat-v3-0324", + "hf_id": null, + "creation_date": "NaT", + "size": null, + "type": "Commercial", + "license": null + }, { "id": "microsoft/phi-4-multimodal-instruct", "hf_id": "microsoft/Phi-4-multimodal-instruct", @@ -8253,83 +8253,83 @@ "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.4438455475739657, + "score": 0.5592169701176339, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.6320800718582147, + "score": 0.7187320759449207, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.5894973558751632, + "score": 0.4438455475739657, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.7562097956860054, + "score": 0.6320800718582147, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.3846086976522069, + "score": 0.5894973558751632, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.5835344719191324, + "score": 0.7562097956860054, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.4804215535486392, + "score": 0.3846086976522069, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.6694735319785804, + "score": 0.5835344719191324, "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.2511517944602615, + "score": 0.3472596783998825, "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.4484633445384819, + "score": 0.5880210095195896, "sentence_nr": 0 }, { @@ -8349,51 +8349,51 @@ "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.5749603738163459, + "score": 0.5617561349997696, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.7240488251574404, + "score": 0.7132694856647042, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.5617561349997696, + "score": 0.2963216580569375, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.7132694856647042, + "score": 0.5101500486835966, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.2963216580569375, + "score": 0.6303545030576861, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.5101500486835966, + "score": 0.77785134764153, "sentence_nr": 0 }, { @@ -8445,83 +8445,83 @@ "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.32063971770635635, + "score": 0.3742128962272385, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.5206258401513325, + "score": 0.5924994297544066, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.39086127104761287, + "score": 0.32063971770635635, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.6239956806265569, + "score": 0.5206258401513325, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.3020679767949182, + "score": 0.39086127104761287, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.5246291817407542, + "score": 0.6239956806265569, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.29261990846502584, + "score": 0.3020679767949182, "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.5207965578474395, + "score": 0.5246291817407542, "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.23343658187420896, + "score": 0.2516114673955893, "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.5188968707275573, + "score": 0.5164808837319497, "sentence_nr": 0 }, { @@ -8541,51 +8541,51 @@ "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.2596939072050362, + "score": 0.4273817965049865, "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.4394574387008692, + "score": 0.6016204186733703, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.4273817965049865, + "score": 0, "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", + "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.6016204186733703, + "score": 0.0, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0, + "score": 0.3972267643943283, "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.0, + "score": 0.5952617863931118, "sentence_nr": 0 }, { @@ -8621,15652 +8621,14836 @@ "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.7964573357809173, + "score": 0.9878765474230741, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.8458636471716781, + "score": 0.9958930217841712, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.34633672321253084, + "score": 0.8780634320789833, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.5378805625051344, + "score": 0.926946700115022, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.3582301850807646, + "score": 0.7964573357809173, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.5380305837807603, + "score": 0.8458636471716781, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.300740577257699, + "score": 1.0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.5272774705181614, + "score": 1.0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.3099603853356145, + "score": 0.9452996322890763, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.5209233176748354, + "score": 0.9463396364218181, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.35580399268816465, + "score": 0.9878765474230741, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.5392592206305507, + "score": 0.9958930217841712, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.39317381456022266, + "score": 0.9144679601133087, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.6026058740561834, + "score": 0.968636887477685, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.48930936408255293, + "score": 0.6537803976048806, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.699085629239476, + "score": 0.7742226743967544, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.3963410285961713, + "score": 0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.613166190285915, + "score": 0.0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.44294247711132617, + "score": 0.9878765474230741, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.5915660675216782, + "score": 0.9958930217841712, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.3756985486608933, + "score": 0.738238064391125, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.5991443770283833, + "score": 0.8637738769684485, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.5009456904181451, + "score": 1.0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.6893719644090858, + "score": 1.0, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.18273944860385094, + "score": 0.4226799078177409, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.44261865187418153, + "score": 0.5651672709988255, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.2153742037697241, + "score": 0.32406433662077544, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.4581737688885401, + "score": 0.5243586266504104, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.3372953649368346, + "score": 0.34633672321253084, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.5482505380106469, + "score": 0.5378805625051344, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.28528905353056333, + "score": 0.3852076286218103, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.4885812318466243, + "score": 0.5629822759777402, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.2935204022158406, + "score": 0.26021014514167856, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.4867597973247361, + "score": 0.4820043660869366, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.2929684584911775, + "score": 0.2502214193201532, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.5038324436049059, + "score": 0.49819657249183386, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.4034224234291925, + "score": 0.4224822177894696, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.5736798834726872, + "score": 0.5623149154312317, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.1077205146963877, + "score": 0.441464946158803, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.428338145564396, + "score": 0.6003092613714627, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.22327767951697297, + "score": 0.34734422615832194, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.4063556880747369, + "score": 0.5262645092345396, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.2572733200413211, + "score": 0.3232734746803988, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.4520014138562526, + "score": 0.5256417654956012, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.40311197004738203, + "score": 0.13576358182705253, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.5788525108956781, + "score": 0.41529193531769876, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.45313578977486535, + "score": 0.24926331918525627, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.6160993561903745, + "score": 0.4599756430080559, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.2651736858432996, + "score": 0.5077888484472814, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.4491383344282561, + "score": 0.6493197366069867, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.34545319957597864, + "score": 0.4318843329340524, "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.5727052860304503, + "score": 0.6011096108554106, "sentence_nr": 0 }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15815751066481462, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5152611872266766, - "sentence_nr": 1 - }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.12903696060775005, - "sentence_nr": 1 + "score": 0.3582301850807646, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.456225988032654, - "sentence_nr": 1 + "score": 0.5380305837807603, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.44175593938589236, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.024459391267874976, - "sentence_nr": 1 + "score": 0.6111096352841461, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.12351824822447692, - "sentence_nr": 1 + "score": 0.3905612192964119, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.46822754470803873, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.3465147345201782, - "sentence_nr": 1 + "score": 0.5861956606716949, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.08516700886866406, - "sentence_nr": 1 + "score": 0.388275825650142, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.4091252890943268, - "sentence_nr": 1 + "score": 0.5946895227088745, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.19194937906573872, - "sentence_nr": 1 + "score": 0.5447134963471945, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.5477665664300843, - "sentence_nr": 1 + "score": 0.7261993659965442, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.47840604738578085, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.4370196290761142, - "sentence_nr": 1 + "score": 0.6297473901472479, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.20669086265781264, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.5076721272198604, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.17630490037560695, - "sentence_nr": 1 + "score": 0.5009380663759289, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.48116430160978857, - "sentence_nr": 1 + "score": 0.6679481474132949, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2087397501881324, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.4122750002638689, - "sentence_nr": 1 + "score": 0.39410069470509135, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.15412719160788987, - "sentence_nr": 1 + "score": 0.4640742081615844, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.5010353699512481, - "sentence_nr": 1 + "score": 0.6372680189651158, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.12369892692249995, - "sentence_nr": 1 + "score": 0.3732667150787326, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.44549610902403686, - "sentence_nr": 1 + "score": 0.5674650482249737, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.12560672881768975, - "sentence_nr": 1 + "score": 0.4641883721676649, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.4969560260291519, - "sentence_nr": 1 + "score": 0.6403267149729506, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", "metric": "bleu", - "score": 0.17077058518804336, - "sentence_nr": 1 + "score": 0.300740577257699, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.5022008374701596, - "sentence_nr": 1 + "score": 0.5272774705181614, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.10784756064735967, - "sentence_nr": 1 + "score": 0.4061066499716187, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.4427230465401631, - "sentence_nr": 1 + "score": 0.6080346530552228, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.06735571462439276, - "sentence_nr": 1 + "score": 0.31520390441828733, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.38102852892512806, - "sentence_nr": 1 + "score": 0.5666753970394321, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.2426576141982896, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.42723260976616784, - "sentence_nr": 1 + "score": 0.4681164293806726, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.1694466724647263, - "sentence_nr": 1 + "score": 0.4740970660161798, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.4902502031746037, - "sentence_nr": 1 + "score": 0.6509090646705696, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.3725907668893922, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.3532931581623198, - "sentence_nr": 1 + "score": 0.5838909337906717, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.175396614619324, - "sentence_nr": 1 + "score": 0.1229583779881281, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.49736499605529066, - "sentence_nr": 1 + "score": 0.3267617054992069, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.15154395847232716, - "sentence_nr": 1 + "score": 0.46418585410212687, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.46053919348995803, - "sentence_nr": 1 + "score": 0.6257813924169782, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.21346660402255854, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.4041678259311437, - "sentence_nr": 1 + "score": 0.4749873824263006, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.1290514243115152, - "sentence_nr": 1 + "score": 0.3284656616594502, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.4766581477336301, - "sentence_nr": 1 + "score": 0.5314089060682492, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.08273178236238297, - "sentence_nr": 1 + "score": 0.3576035471132581, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.36399666460809255, - "sentence_nr": 1 + "score": 0.5426399702952437, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.12601482779921785, - "sentence_nr": 1 + "score": 0.4422044705926463, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.43595665254608706, - "sentence_nr": 1 + "score": 0.6089032707320831, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.40959087443621306, - "sentence_nr": 1 + "score": 0.3099603853356145, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.6348509381122925, - "sentence_nr": 1 + "score": 0.5209233176748354, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.08214106568089705, - "sentence_nr": 1 + "score": 0.30939216619448856, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.3969463877642616, - "sentence_nr": 1 + "score": 0.5208328629222005, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.0744904632040495, - "sentence_nr": 1 + "score": 0.2417404985264926, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.4111163205685468, - "sentence_nr": 1 + "score": 0.4540589962283635, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.12894104034845807, - "sentence_nr": 1 + "score": 0.2840596414449913, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.4486368934849452, - "sentence_nr": 1 + "score": 0.4892722276483434, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.10070927557742705, - "sentence_nr": 1 + "score": 0.29556470672244106, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.43718220262892105, - "sentence_nr": 1 + "score": 0.5006575554615639, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.0772718393063023, - "sentence_nr": 1 + "score": 0.344338817815182, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.4203683137304257, - "sentence_nr": 1 + "score": 0.5355920179313903, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.0756907193511249, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.4138725093679467, - "sentence_nr": 1 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.21748353646757182, - "sentence_nr": 1 + "score": 0.28716979381420105, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.4462746462826943, - "sentence_nr": 1 + "score": 0.5171262478660463, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.4179644538349004, - "sentence_nr": 1 + "score": 0.05448112815049329, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.10505106462290037, - "sentence_nr": 1 + "score": 0.3360010226928493, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.4474870048911137, - "sentence_nr": 1 + "score": 0.5216531073745614, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.33210944907163426, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.0009218289085545725, - "sentence_nr": 1 + "score": 0.5289420578289948, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.15653859793617866, - "sentence_nr": 1 + "score": 0.4331131003868224, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.43177798053127925, - "sentence_nr": 1 + "score": 0.5898969623074624, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.0891537192318598, - "sentence_nr": 1 + "score": 0.35580399268816465, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.3970634926176537, - "sentence_nr": 1 + "score": 0.5392592206305507, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.0950136506275681, - "sentence_nr": 1 + "score": 0.40724702386633355, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.4372017487229785, - "sentence_nr": 1 + "score": 0.5650294312072152, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.1259356760989446, - "sentence_nr": 1 + "score": 0.3741026207881868, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.44568274520971096, - "sentence_nr": 1 + "score": 0.5834523243646894, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.16322494183480127, - "sentence_nr": 1 + "score": 0.3909683536530208, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.4815584993817062, - "sentence_nr": 1 + "score": 0.5861999156017297, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.0904087252785689, - "sentence_nr": 1 + "score": 0.42514139917377647, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.41830513174690515, - "sentence_nr": 1 + "score": 0.5934850967299605, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.21351902664706998, - "sentence_nr": 1 + "score": 0.4100880948326119, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.5130443042033361, - "sentence_nr": 1 + "score": 0.5748650910980349, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.16269986423611488, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.06939838145153245, - "sentence_nr": 1 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 + "score": 0.553414625382002, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.3371547585108182, - "sentence_nr": 1 + "score": 0.7074940030211, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.1691386174483793, - "sentence_nr": 1 + "score": 0.19250412598108757, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.4920789340026317, - "sentence_nr": 1 + "score": 0.4448372401459185, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.14944432524273302, - "sentence_nr": 1 + "score": 0.3109333640704356, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.4972796478830659, - "sentence_nr": 1 + "score": 0.5072760587388273, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.09793316925795417, - "sentence_nr": 1 + "score": 0.4475435253337274, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.4297577431879659, - "sentence_nr": 1 + "score": 0.5956867226653717, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.38870674200492367, - "sentence_nr": 2 + "score": 0.5274220384037692, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.6484380084879691, - "sentence_nr": 2 + "score": 0.6765588140322357, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.4923751299732868, - "sentence_nr": 2 + "score": 0.39317381456022266, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.6853756490381199, - "sentence_nr": 2 + "score": 0.6026058740561834, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.3996712647649035, - "sentence_nr": 2 + "score": 0.3229030611977504, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.6353525755760105, - "sentence_nr": 2 + "score": 0.5136703373168134, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.5115346945020283, - "sentence_nr": 2 + "score": 0.3113772787307771, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.7037574715738644, - "sentence_nr": 2 + "score": 0.5344680037267059, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.017834618169115152, - "sentence_nr": 2 + "score": 0.35103160282487145, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.05927156798818119, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.23904922011090457, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3399292774084129, - "sentence_nr": 2 + "score": 0.5432217848942439, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.6152980280400979, - "sentence_nr": 2 + "score": 0.451294475352144, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.8311281590297233, - "sentence_nr": 2 + "score": 0.6017904208103514, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.005449161724399305, - "sentence_nr": 2 + "score": 0.4831574055451935, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.026158029267484995, - "sentence_nr": 2 + "score": 0.6238976883927624, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.24508104771894088, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.5725552336126134, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.3667951090093586, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.574382729364071, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.20801258614305904, - "sentence_nr": 2 + "score": 0.0895824671662166, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.26703508536995574, - "sentence_nr": 2 + "score": 0.24679712992918926, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.35315040956049437, - "sentence_nr": 2 + "score": 0.3511508047578372, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.625895188503691, - "sentence_nr": 2 + "score": 0.502364219831564, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.11133996756497437, - "sentence_nr": 2 + "score": 0.4166560818400039, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.4410280353998367, - "sentence_nr": 2 + "score": 0.6515522498665886, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.17743299460161885, - "sentence_nr": 2 + "score": 0.42734667499155, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.43071271897416463, - "sentence_nr": 2 + "score": 0.6397906518456509, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.16052654068024738, - "sentence_nr": 2 + "score": 0.48930936408255293, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.41580120868053494, - "sentence_nr": 2 + "score": 0.699085629239476, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.05963579607071745, - "sentence_nr": 2 + "score": 0.49517040114696814, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.31139762378406344, - "sentence_nr": 2 + "score": 0.672650019344124, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.006734847287559362, - "sentence_nr": 2 + "score": 0.47426274497383164, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.03408121951468736, - "sentence_nr": 2 + "score": 0.6547171931962555, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.09880177230676102, - "sentence_nr": 2 + "score": 0.44697138732796604, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.3297638349619511, - "sentence_nr": 2 + "score": 0.6533822343227146, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.2377604053257556, - "sentence_nr": 2 + "score": 0.46782777727347913, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.5662768009060447, - "sentence_nr": 2 + "score": 0.6774531400702429, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.22573408807826306, - "sentence_nr": 2 + "score": 0.5803563388252858, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.5444672928195973, - "sentence_nr": 2 + "score": 0.7397216312303552, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.10742716472890976, - "sentence_nr": 2 + "score": 0.05034135169161612, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.42694859148910824, - "sentence_nr": 2 + "score": 0.25001156386121903, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 2 + "score": 0.45495679780282583, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.699735222419999, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 + "score": 0.0025767494884759577, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 + "score": 0.022849655955591117, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.14745870033404418, - "sentence_nr": 2 + "score": 0.49179307081132717, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.475170637938921, - "sentence_nr": 2 + "score": 0.6798382116037067, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.21665407194210906, - "sentence_nr": 2 + "score": 0.37224644590020084, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.4344921442639243, - "sentence_nr": 2 + "score": 0.5716341952568125, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.37994652561206577, - "sentence_nr": 2 + "score": 0.36921945860245514, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.6464467277069994, - "sentence_nr": 2 + "score": 0.5602656572610939, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.09362261118571368, - "sentence_nr": 2 + "score": 0.3963410285961713, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.3452056942265759, - "sentence_nr": 2 + "score": 0.613166190285915, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.18917620656425485, - "sentence_nr": 2 + "score": 0.390589858528132, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.4346170232980484, - "sentence_nr": 2 + "score": 0.5498108214680063, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.420450507904553, - "sentence_nr": 2 + "score": 0.33433503990805974, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.6503146347305717, - "sentence_nr": 2 + "score": 0.5465517653500693, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.24894072982768842, - "sentence_nr": 2 + "score": 0.2820342917142487, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.5212235893093335, - "sentence_nr": 2 + "score": 0.5410881356746259, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.393613605227227, - "sentence_nr": 2 + "score": 0.5030966277003764, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.6492198447661237, - "sentence_nr": 2 + "score": 0.680466091037649, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.21147734744561483, - "sentence_nr": 2 + "score": 0.4611551555069207, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.41020178654369294, - "sentence_nr": 2 + "score": 0.6294324146720465, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.2329856851831642, - "sentence_nr": 2 + "score": 0.19685577478840446, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.5405751250637106, - "sentence_nr": 2 + "score": 0.4551749985589161, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.41756686236967944, - "sentence_nr": 2 + "score": 0.41865363173987147, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.5616829345739638, - "sentence_nr": 2 + "score": 0.6246787832833863, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.38189567401226293, - "sentence_nr": 2 + "score": 0.18474062565700086, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.6154314825900052, - "sentence_nr": 2 + "score": 0.3944315616553734, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.2126707920684064, - "sentence_nr": 2 + "score": 0.4044350002821056, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.4659908460634765, - "sentence_nr": 2 + "score": 0.5937440273149751, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.23240102389974368, - "sentence_nr": 2 + "score": 0.3660623361610902, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.4973274282641141, - "sentence_nr": 2 + "score": 0.5840165124966731, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.17979384730979156, - "sentence_nr": 2 + "score": 0.4404175157492415, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.4177311931467539, - "sentence_nr": 2 + "score": 0.6499400950194552, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.1702602472176709, - "sentence_nr": 2 + "score": 0.44294247711132617, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.4366640707779677, - "sentence_nr": 2 + "score": 0.5915660675216782, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.933651069586263, - "sentence_nr": 2 + "score": 0.418987297037058, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.9586507529693243, - "sentence_nr": 2 + "score": 0.5748791698084322, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.3816408219023713, - "sentence_nr": 2 + "score": 0.2020983719878774, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.5784105768028126, - "sentence_nr": 2 + "score": 0.4110516731487298, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.18398226639192106, - "sentence_nr": 2 + "score": 0.3495696951007327, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.37285010531146734, - "sentence_nr": 2 + "score": 0.5497542561082874, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.26958884543190903, - "sentence_nr": 2 + "score": 0.40124039505328407, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.5631664732610485, - "sentence_nr": 2 + "score": 0.5502161218203272, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.4005296397635166, - "sentence_nr": 2 + "score": 0.4342485684315921, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.6201785376974677, - "sentence_nr": 2 + "score": 0.5862756549666985, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.15956483578595942, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.425693420655628, - "sentence_nr": 2 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.2323385180696658, - "sentence_nr": 2 + "score": 0.32846260295658253, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.5019509292309764, - "sentence_nr": 2 + "score": 0.5234687470369108, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.22952177306405494, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5279520952576137, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.3618488169166299, - "sentence_nr": 2 + "score": 0.0626814220834104, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.5708179622131996, - "sentence_nr": 2 + "score": 0.2649283376124583, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.1712766252338756, - "sentence_nr": 2 + "score": 0.3504606692020456, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.5225554962608486, - "sentence_nr": 2 + "score": 0.5696982139616064, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.2709079038456153, - "sentence_nr": 2 + "score": 0.500516497336299, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.447458019441992, - "sentence_nr": 2 + "score": 0.7019407549121803, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.38249626297768063, - "sentence_nr": 3 + "score": 0.40306183496110326, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.40976234193505356, - "sentence_nr": 3 + "score": 0.6065077241830509, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.5806197937310393, - "sentence_nr": 3 + "score": 0.3756985486608933, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.7346706700987636, - "sentence_nr": 3 + "score": 0.5991443770283833, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.5793367580502561, - "sentence_nr": 3 + "score": 0.3437925129268647, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.6502428441722727, - "sentence_nr": 3 + "score": 0.5496158439811546, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.4855332614117322, - "sentence_nr": 3 + "score": 0.4457795438900481, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5299556742893647, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.19940445989088915, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.43164821827950184, - "sentence_nr": 3 + "score": 0.6608358312257032, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.2423441824135159, - "sentence_nr": 3 + "score": 0.4389167617930115, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.4429509373913047, - "sentence_nr": 3 + "score": 0.6283965584123504, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.456804148784435, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.7056438934239434, - "sentence_nr": 3 + "score": 0.6653973164805368, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.6064630666233242, - "sentence_nr": 3 + "score": 0.4544549777519972, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.6752055521830945, - "sentence_nr": 3 + "score": 0.6588011478075102, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.5357110024227318, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.6365941772753647, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.4720654627116666, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.6517469394467796, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.14790264259417688, - "sentence_nr": 3 + "score": 0.13081443497119305, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.27159767590045303, - "sentence_nr": 3 + "score": 0.417733523030983, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.4751132438608344, - "sentence_nr": 3 + "score": 0.5333753443479871, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.6849386986272349, - "sentence_nr": 3 + "score": 0.6915993702819169, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.08635800047213174, - "sentence_nr": 3 + "score": 0.4824471894538444, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.218109371254876, - "sentence_nr": 3 + "score": 0.6756807439055712, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.23386786214190372, - "sentence_nr": 3 + "score": 0.4892530408936975, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.3682311523733465, - "sentence_nr": 3 + "score": 0.6697286007212407, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.11739521786077453, - "sentence_nr": 3 + "score": 0.5009456904181451, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.22090491782919655, - "sentence_nr": 3 + "score": 0.6893719644090858, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 3 + "score": 0.5011800954736271, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.280413108453108, - "sentence_nr": 3 + "score": 0.6882325337556615, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.11547518641061649, - "sentence_nr": 3 + "score": 0.42451113499289145, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.25945846414490087, - "sentence_nr": 3 + "score": 0.6326418045965277, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.20233074088759792, - "sentence_nr": 3 + "score": 0.47347369701789205, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.3746629492952356, - "sentence_nr": 3 + "score": 0.681786235656136, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.40214612768560637, - "sentence_nr": 3 + "score": 0.4390212047669306, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.45128424593135114, - "sentence_nr": 3 + "score": 0.6475219955365487, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.37284875432797243, - "sentence_nr": 3 + "score": 0.5043550869731553, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.44888401040760956, - "sentence_nr": 3 + "score": 0.6689462373151898, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.0925329498915617, - "sentence_nr": 3 + "score": 0.28630516999083483, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.2110486160692096, - "sentence_nr": 3 + "score": 0.4859658293338903, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 3 + "score": 0.4628513442989428, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "score": 0.6651586361790265, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 3 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.12453389344594705, - "sentence_nr": 3 + "score": 0.004953764861294584, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.141543757252386, - "sentence_nr": 3 + "score": 0.48578120610890896, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.2594145364221844, - "sentence_nr": 3 + "score": 0.6418878687312928, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.6244631487487835, - "sentence_nr": 3 + "score": 0.23363375253301555, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.6931369519059803, - "sentence_nr": 3 + "score": 0.4539275409654266, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.25383339228798274, - "sentence_nr": 3 + "score": 0.498687604330117, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.45896379476820603, - "sentence_nr": 3 + "score": 0.6495577010231699, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.17200767571780612, - "sentence_nr": 3 + "score": 0.18273944860385094, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.3723150838362789, - "sentence_nr": 3 + "score": 0.44261865187418153, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.1477219991186121, - "sentence_nr": 3 + "score": 0.1921021633645501, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.28685201698226354, - "sentence_nr": 3 + "score": 0.41090634933708026, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.3254455687469726, - "sentence_nr": 3 + "score": 0.2019984490972421, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.4474512036484817, - "sentence_nr": 3 + "score": 0.42969616197156246, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.23887527917609022, - "sentence_nr": 3 + "score": 0.24058995161649158, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.4120359948636439, - "sentence_nr": 3 + "score": 0.4522509933949415, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.23560673823249806, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.728208634600343, - "sentence_nr": 3 + "score": 0.45057120279075363, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 + "score": 0.34777225435927045, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "score": 0.5603739447290761, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.3556521383601747, - "sentence_nr": 3 + "score": 0.0343688963868873, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.594830811413066, - "sentence_nr": 3 + "score": 0.2491787368704391, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.21629114799587432, - "sentence_nr": 3 + "score": 0.2488577037192601, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.3542320138389837, - "sentence_nr": 3 + "score": 0.4700612059850866, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.728208634600343, - "sentence_nr": 3 + "score": 0.10246901021115776, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.27405612859390877, - "sentence_nr": 3 + "score": 0.26380432026626405, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.4639958592456083, - "sentence_nr": 3 + "score": 0.4634992426765033, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.13004800471424346, - "sentence_nr": 3 + "score": 0.30519601919508343, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.28217142159025543, - "sentence_nr": 3 + "score": 0.48440897375540304, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.37821486365532614, - "sentence_nr": 3 + "score": 0.3897372020625521, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.4718665834023439, - "sentence_nr": 3 + "score": 0.5520780806464591, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.3699382260470039, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.4032851361478274, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.45167594566243024, - "sentence_nr": 3 + "score": 0.2153742037697241, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.5169677927619225, - "sentence_nr": 3 + "score": 0.4581737688885401, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.23386786214190372, - "sentence_nr": 3 + "score": 0.22583314893598608, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.3780009826926042, - "sentence_nr": 3 + "score": 0.4634872123809323, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.3925121365052661, - "sentence_nr": 3 + "score": 0.2000682107464079, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.47788592802001717, - "sentence_nr": 3 + "score": 0.42213947952783815, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.1423412184218882, - "sentence_nr": 3 + "score": 0.22347194598034506, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.2596718628394258, - "sentence_nr": 3 + "score": 0.4665650707763161, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.3572188192648703, - "sentence_nr": 3 + "score": 0.32447211622666056, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.45381175288762937, - "sentence_nr": 3 + "score": 0.5077210804141314, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.07425055521504613, - "sentence_nr": 3 + "score": 0.34059658886569716, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.18122341046764998, - "sentence_nr": 3 + "score": 0.5263364808620599, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.1978585723043446, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.3527599187160617, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.2523019529343173, - "sentence_nr": 3 + "score": 0.31145364701208733, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.4406369072888057, - "sentence_nr": 3 + "score": 0.5087911718200273, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.41072675483179805, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.5635589150380774, - "sentence_nr": 3 + "score": 0.004236700409670164, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.3883375900135818, - "sentence_nr": 3 + "score": 0.3711271620335373, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.4643731845106876, - "sentence_nr": 3 + "score": 0.5606811328336353, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 + "score": 0.34655442187135127, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.7123666275414222, - "sentence_nr": 3 + "score": 0.6023567722379627, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.2775751476798985, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "score": 0.5467407840471017, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.3372953649368346, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.5482505380106469, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.3598041249522345, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.5672133517600307, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.275788082902897, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2246029757863831, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5773502691896258, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.7999099314029202, - "sentence_nr": 4 + "score": 0.4682894376569175, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.25810978038865107, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.6417603075499863, - "sentence_nr": 4 + "score": 0.5114862976334219, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.7825422900366437, - "sentence_nr": 4 + "score": 0.4459565225038376, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.8503171627677965, - "sentence_nr": 4 + "score": 0.6647029994959, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.46443675322751826, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.6512173868183774, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.37709297891717664, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.6881502501430368, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 4 + "score": 0.4244950970711203, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 + "score": 0.6318443095842109, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.28612208859224425, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.20955561269928308, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "score": 0.451252540938088, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2453787991485662, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.4099668999237371, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2988083057066004, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.4973008562914265, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.28528905353056333, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.4885812318466243, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.35797362976091973, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.5581982021478125, - "sentence_nr": 4 + "score": 0.5379266632230616, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2606045000988204, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.4525313379099324, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.32155751243171055, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.652013511062815, - "sentence_nr": 4 + "score": 0.5282954234137397, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4400355050484472, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.5988144881332053, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4187059279293422, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.5806611969643932, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.42286596174824126, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.5934357258501683, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.27694098293799824, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.36560991595112396, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.537072365457506, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3091536050099401, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.5881561248602009, - "sentence_nr": 4 + "score": 0.504963808447426, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.35120509380099896, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.40435987083533204, - "sentence_nr": 4 + "score": 0.5127991322787522, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2935204022158406, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.39858613265631837, - "sentence_nr": 4 + "score": 0.4867597973247361, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.36394690002866714, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.4425973012069069, - "sentence_nr": 4 + "score": 0.5567484827579814, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.278704088378991, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.47160616105623426, - "sentence_nr": 4 + "score": 0.4868935860000992, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5309982646782259, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 4 + "score": 0.28644027312006637, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.6151179643430991, - "sentence_nr": 4 + "score": 0.4643839364819269, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.5183632566399202, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.41238100267720657, - "sentence_nr": 4 + "score": 0.6705567848900439, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.39892980454447485, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.39909989628767284, - "sentence_nr": 4 + "score": 0.5839375286411709, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.17181529671327242, - "sentence_nr": 4 + "score": 0.20065106893244083, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.5293474685884572, - "sentence_nr": 4 + "score": 0.41485356318318073, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.23119533406164058, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.4429196299668147, - "sentence_nr": 4 + "score": 0.4549622022003173, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.1501956901694662, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 + "score": 0.3435352939078531, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 + "score": 0.30692705311222085, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.5802683403568892, - "sentence_nr": 4 + "score": 0.5531231299653412, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.3237722713145643, - "sentence_nr": 4 + "score": 0.41613344165345995, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.7426638026175545, - "sentence_nr": 4 + "score": 0.5740077532098984, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.49132868804528823, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.49342175914364256, - "sentence_nr": 4 + "score": 0.6524450166860349, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.2929684584911775, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.4352628824108997, - "sentence_nr": 4 + "score": 0.5038324436049059, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3414171640083141, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.5116862201536014, - "sentence_nr": 4 + "score": 0.5306256202657124, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3858101625283812, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.33471616336068044, - "sentence_nr": 4 + "score": 0.5618844078335644, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.2865612242047131, - "sentence_nr": 4 + "score": 0.3116287423376191, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.6433813179203622, - "sentence_nr": 4 + "score": 0.4893092447918963, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.4353732493964906, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.3598792258309727, - "sentence_nr": 4 + "score": 0.6078594152630662, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.40983351958195835, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.5125809225356253, - "sentence_nr": 4 + "score": 0.5980339788644404, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.27523578634783447, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.5539920925426138, - "sentence_nr": 4 + "score": 0.501243321339511, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.40840960406849836, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.5226572946586268, - "sentence_nr": 4 + "score": 0.5662632887734669, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.047201037160775325, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.5073395824633415, - "sentence_nr": 4 + "score": 0.12237706077959995, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.40939284504147777, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.29382595610734974, - "sentence_nr": 4 + "score": 0.5645111896180985, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 + "score": 0.49428478171113605, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.5773664661124461, - "sentence_nr": 4 + "score": 0.6360862650323953, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.27106784138456536, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.458287745564531, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.4034224234291925, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.5736798834726872, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.7013062757071812, - "sentence_nr": 5 + "score": 0.47117590712234436, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.9303769449292738, - "sentence_nr": 5 + "score": 0.6157183058759933, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.4840329060094462, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2381658499765768, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.9063898435384111, - "sentence_nr": 5 + "score": 0.6570537611908611, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.5309354663044072, - "sentence_nr": 5 + "score": 0.33677049851999397, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.6756014232714684, - "sentence_nr": 5 + "score": 0.5762640586619034, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.5481366186143743, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.65502698375226, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.4529852871970908, - "sentence_nr": 5 + "score": 0.48936688255103167, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.6941474239078328, - "sentence_nr": 5 + "score": 0.6232884959088987, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.8522456714074852, - "sentence_nr": 5 + "score": 0.2935294310015522, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.9096914044088521, - "sentence_nr": 5 + "score": 0.4403308077637572, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.49864013450084044, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.6473028953530363, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.15688445463098402, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.9457416090031758, - "sentence_nr": 5 + "score": 0.4711705838157902, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.9892952933418456, - "sentence_nr": 5 + "score": 0.6028678286611538, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.3109058809229358, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.5045951829816013, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.2822871796543221, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.5208915029538709, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.1077205146963877, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.428338145564396, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.09543667505391068, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.40518998504409354, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.4234885228074744, - "sentence_nr": 5 + "score": 0.14087022592589463, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.7410180114887145, - "sentence_nr": 5 + "score": 0.42752370954120755, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.47375069012411286, - "sentence_nr": 5 + "score": 0.36576182289875453, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.7543919667018285, - "sentence_nr": 5 + "score": 0.5569403582137159, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.5738396574789242, - "sentence_nr": 5 + "score": 0.3304174876425892, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.798357133373606, - "sentence_nr": 5 + "score": 0.5249852702194517, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.5738396574789242, - "sentence_nr": 5 + "score": 0.28983869034423043, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.798357133373606, - "sentence_nr": 5 + "score": 0.4975732770770436, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.28060741458208943, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.496580338229036, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.4234885228074744, - "sentence_nr": 5 + "score": 0.07383026958055552, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.7410180114887145, - "sentence_nr": 5 + "score": 0.18582113429299857, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.47375069012411286, - "sentence_nr": 5 + "score": 0.32814442346427775, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7543919667018285, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 + "score": 0.5149345446415335, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.40276720463657734, - "sentence_nr": 5 + "score": 0.2556346494160282, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.6529271690805427, - "sentence_nr": 5 + "score": 0.4538035440310274, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.30188353873287377, - "sentence_nr": 5 + "score": 0.4102082155233312, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.6086565367747951, - "sentence_nr": 5 + "score": 0.5474039587505726, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.6026286934891149, - "sentence_nr": 5 + "score": 0.22327767951697297, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.8025775976044891, - "sentence_nr": 5 + "score": 0.4063556880747369, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 + "score": 0.26234851988380015, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 + "score": 0.4686295191568941, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.3272712268138726, - "sentence_nr": 5 + "score": 0.22472032138500259, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.6272846474183881, - "sentence_nr": 5 + "score": 0.4363253004030211, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.2981792160679168, - "sentence_nr": 5 + "score": 0.14283509516492696, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.5788026000794341, - "sentence_nr": 5 + "score": 0.39080133039424786, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.7012294787544179, - "sentence_nr": 5 + "score": 0.2792735917259789, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.8478115719875968, - "sentence_nr": 5 + "score": 0.4652543566408097, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.21690365808279138, - "sentence_nr": 5 + "score": 0.3284145915841146, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.5384773678665918, - "sentence_nr": 5 + "score": 0.4777770768413136, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.25711386542134795, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.6088853751738869, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.5695988432761473, - "sentence_nr": 5 + "score": 0.32565974985390567, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.7516103467926585, - "sentence_nr": 5 + "score": 0.5028101514284876, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.7361065921505279, - "sentence_nr": 5 + "score": 0.29187222866434104, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.38091370416670794, - "sentence_nr": 5 + "score": 0.20649207653440943, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.6438225861756911, - "sentence_nr": 5 + "score": 0.45559573554011507, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 + "score": 0.2392120773016637, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.7202697992734389, - "sentence_nr": 5 + "score": 0.440445343487272, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 + "score": 0.22424453668984448, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 + "score": 0.41637444107955873, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.5309354663044072, - "sentence_nr": 5 + "score": 0.2572733200413211, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.6990707992725005, - "sentence_nr": 5 + "score": 0.4520014138562526, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.1673872929477023, - "sentence_nr": 5 + "score": 0.23112644289004342, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.4506667273103674, - "sentence_nr": 5 + "score": 0.4228120750850924, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 + "score": 0.1733396766438206, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 + "score": 0.37157614360073693, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.3416581331218724, - "sentence_nr": 5 + "score": 0.3070946890889356, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.6578570934289981, - "sentence_nr": 5 + "score": 0.5150001444865586, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.4797543511401896, - "sentence_nr": 5 + "score": 0.33726552749982586, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.7240781310560407, - "sentence_nr": 5 + "score": 0.5166923315613857, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.6401876410870359, - "sentence_nr": 5 + "score": 0.34077616827498786, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.7526484951226097, - "sentence_nr": 5 + "score": 0.5278331664063162, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.33057129676705455, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.5669225664686625, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 + "score": 0.31260047665100127, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.7697646564917222, - "sentence_nr": 5 + "score": 0.48667320069984316, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 + "score": 0.05624172669013078, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 + "score": 0.3317185957958262, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.2624355454690498, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.30350690419450826, - "sentence_nr": 5 + "score": 0.48887901649623144, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.569133886912883, - "sentence_nr": 5 + "score": 0.40487199173556226, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.6834516951654327, - "sentence_nr": 5 + "score": 0.5637204315528265, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.40165053057541866, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.5837756195280097, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.40311197004738203, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.5788525108956781, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.30912713581280643, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.5376618148723133, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.22800071662764984, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.3263040636562357, - "sentence_nr": 6 + "score": 0.4763467106828393, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.2997057270104923, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.5028660357670663, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.3874069559631556, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.5570550964093942, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.45074681913051867, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.5985843659278748, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.2697752741359869, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.46086885667795485, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.3442651325185116, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.534774838547693, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.13496104417905996, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.36516261117337495, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.5425947356911068, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.5108628809804742, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.6929396211173784, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 + "score": 0.49872195941208947, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 + "score": 0.6907054265096231, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.3142665434344143, - "sentence_nr": 6 + "score": 0.45313578977486535, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.6466526067220029, - "sentence_nr": 6 + "score": 0.6160993561903745, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.3751840463233443, - "sentence_nr": 6 + "score": 0.37640646218183, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.6279894552667558, - "sentence_nr": 6 + "score": 0.5429063669356702, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.19268479640608693, - "sentence_nr": 6 + "score": 0.4331983607416391, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.551397074868541, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17470942957770763, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.5403400891349619, - "sentence_nr": 6 + "score": 0.601662300924314, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 + "score": 0.4485994475252126, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.5763410052067085, - "sentence_nr": 6 + "score": 0.6258984728025891, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.37392149096896676, - "sentence_nr": 6 + "score": 0.5793415656031259, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.6665214662145853, - "sentence_nr": 6 + "score": 0.7153451394318217, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 + "score": 0.5248291448433852, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 + "score": 0.6920934053021797, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 + "score": 0.33244322003055665, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.5763410052067085, - "sentence_nr": 6 + "score": 0.5420222318771111, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.425742897803471, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.5460240376042262, - "sentence_nr": 6 + "score": 0.6292455373063424, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.24343304284910333, - "sentence_nr": 6 + "score": 0.0072148746031117554, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.6275577931282961, - "sentence_nr": 6 + "score": 0.09496152255049971, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.28571962561926445, - "sentence_nr": 6 + "score": 0.4803498024083505, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.6431872581462166, - "sentence_nr": 6 + "score": 0.5963395991179793, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.6255340042200862, - "sentence_nr": 6 + "score": 0.3335262554878992, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.8724783049357475, - "sentence_nr": 6 + "score": 0.5258955094447381, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 + "score": 0.3411271681324882, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.7411155087367244, - "sentence_nr": 6 + "score": 0.5323123267352375, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.2651736858432996, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.4491383344282561, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.4547900039222725, - "sentence_nr": 6 + "score": 0.3082082660061424, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.6541971428810075, - "sentence_nr": 6 + "score": 0.5077609645706764, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 + "score": 0.32151064813737534, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 + "score": 0.5058205933378546, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 + "score": 0.30384210838236353, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.7122562458056777, - "sentence_nr": 6 + "score": 0.49573162353290035, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 + "score": 0.4140944157226165, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.6824395076981005, - "sentence_nr": 6 + "score": 0.6110635706148037, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.38305978177479755, - "sentence_nr": 6 + "score": 0.3007845437586152, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.6061131723054572, - "sentence_nr": 6 + "score": 0.4720170373660879, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.35069370820755275, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.5735580981959628, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.7289444696770301, - "sentence_nr": 6 + "score": 0.0014196479273140264, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 + "score": 0.27579736884967815, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 + "score": 0.44560520221877703, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 + "score": 0.37742688647401873, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 + "score": 0.5674314405993244, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.3684981984538114, - "sentence_nr": 6 + "score": 0.24828430598240606, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.5606332518476288, - "sentence_nr": 6 + "score": 0.5078550622606068, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.4536404448264584, - "sentence_nr": 6 + "score": 0.34545319957597864, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.8020827133708689, - "sentence_nr": 6 + "score": 0.5727052860304503, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.4545091839935173, - "sentence_nr": 6 + "score": 0.4256604038587669, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.7166050399790445, - "sentence_nr": 6 + "score": 0.595254482532169, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.3370129264673147, - "sentence_nr": 6 + "score": 0.35948829980203323, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.7096874943799061, - "sentence_nr": 6 + "score": 0.5639910704472698, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.1624355752882384, - "sentence_nr": 6 + "score": 0.3625286446151028, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.4952968469712617, - "sentence_nr": 6 + "score": 0.6148737881972042, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 + "score": 0.4821945698340569, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.7807505267551733, - "sentence_nr": 6 + "score": 0.6555631364280885, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.5595205105615875, - "sentence_nr": 6 + "score": 0.4519603667438429, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.8322210048001876, - "sentence_nr": 6 + "score": 0.6009987666643928, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.3142665434344143, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.6466526067220029, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.3610544299180199, - "sentence_nr": 6 + "score": 0.5542271267412462, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.49125115898082056, - "sentence_nr": 6 + "score": 0.7275049499508799, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 + "score": 0.327910616954487, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.7164026439677106, - "sentence_nr": 6 + "score": 0.5832612672351287, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.8578928092681435, - "sentence_nr": 6 + "score": 0.5350666712285949, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.9422733087334002, - "sentence_nr": 6 + "score": 0.6501051146578934, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 + "score": 0.339818403012025, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.42818224355402373, - "sentence_nr": 6 + "score": 0.5156759219303986, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.42105372680687736, - "sentence_nr": 6 + "score": 0.4381699512774638, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7001171094008295, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1582866049832572, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.34487142413575794, - "sentence_nr": 7 + "score": 0.5887410281104106, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.15521606028436608, - "sentence_nr": 7 + "score": 0.38968867962607934, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.37645329404497957, - "sentence_nr": 7 + "score": 0.5581403039390647, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.12620429887108936, - "sentence_nr": 7 + "score": 0.44378279372807367, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.35580703793872603, - "sentence_nr": 7 + "score": 0.5825113284353328, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.12872220631084524, - "sentence_nr": 7 + "score": 0.34093060419986554, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.33602633953270183, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.042121062429802174, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.14281404499176092, - "sentence_nr": 7 + "score": 0.5050414552947896, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.042575418285137674, - "sentence_nr": 7 + "score": 0.32155220285195785, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.05173688961049459, - "sentence_nr": 7 + "score": 0.5502401579986564, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.3045613775157565, - "sentence_nr": 7 + "score": 0.48871893597517396, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.5275070803493389, - "sentence_nr": 7 + "score": 0.6448535407289147, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.2734283774929853, - "sentence_nr": 7 + "score": 0.4647350187265495, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.5252214120598302, - "sentence_nr": 7 + "score": 0.6141693179612359, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.10203846572325131, - "sentence_nr": 7 + "score": 0.2572787263311883, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.33381153680096753, - "sentence_nr": 7 + "score": 0.4597049841535362, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 7 + "score": 0.38621174932063007, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "score": 0.5629586598269498, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.014935758919429663, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.08106107745254391, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.044304867337633724, - "sentence_nr": 7 + "score": 0.431633629801714, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.20806974344498103, - "sentence_nr": 7 + "score": 0.5611890334237722, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.08860973467526746, - "sentence_nr": 7 + "score": 0.12843096555088776, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.3178004360288637, - "sentence_nr": 7 + "score": 0.3356201430079791, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.15268019045355535, - "sentence_nr": 7 + "score": 0.17727473966694943, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.41028757620299977, - "sentence_nr": 7 + "score": 0.3475071694578125, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.030860166165309233, - "sentence_nr": 7 + "score": 0.20451416608402828, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.1100250143829584, - "sentence_nr": 7 + "score": 0.38185285396290036, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.21255327712152144, - "sentence_nr": 7 + "score": 0.24678030799496634, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.43272151570555034, - "sentence_nr": 7 + "score": 0.4463603005685723, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.01486609147288197, - "sentence_nr": 7 + "score": 0.17736142488062245, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.13893773605583024, - "sentence_nr": 7 + "score": 0.3654025502565916, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.06609667473412645, - "sentence_nr": 7 + "score": 0.14222939605129875, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.26197209338359717, - "sentence_nr": 7 + "score": 0.32212719342865237, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.26064517697298795, - "sentence_nr": 7 + "score": 0.2562205755075293, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.5092206110218525, - "sentence_nr": 7 + "score": 0.46066225689165846, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.1507980395794452, - "sentence_nr": 7 + "score": 0.24809323900653618, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.4306039128585424, - "sentence_nr": 7 + "score": 0.455254011012136, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.1438459189500836, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.30693371625402605, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.0979038733644086, - "sentence_nr": 7 + "score": 0.22798424876104878, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.30211704738953993, - "sentence_nr": 7 + "score": 0.4315883077530936, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.009624974244068071, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.07318255686027669, - "sentence_nr": 7 + "score": 0.0013506212857914643, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.043420474648595074, - "sentence_nr": 7 + "score": 0.22750547588410633, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.2884095690753619, - "sentence_nr": 7 + "score": 0.4160796302144522, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.13868172938464635, - "sentence_nr": 7 + "score": 0.3536676112393946, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.3094469764260441, - "sentence_nr": 7 + "score": 0.5252283198216768, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.11091252683001185, - "sentence_nr": 7 + "score": 0.5494025263062274, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.26607634610445896, - "sentence_nr": 7 + "score": 0.6860161543947312, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.18154954789336694, - "sentence_nr": 7 + "score": 0.2636405082687104, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.4557483776072868, - "sentence_nr": 7 + "score": 0.5072009470421238, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.1381751568911733, - "sentence_nr": 7 + "score": 0.3994138413590059, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.3121557499162649, - "sentence_nr": 7 + "score": 0.6335699196668345, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.20065115069964384, - "sentence_nr": 7 + "score": 0.32440820201863096, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.4084885616013531, - "sentence_nr": 7 + "score": 0.5816024759666973, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.12291219097556666, - "sentence_nr": 7 + "score": 0.30860365223174097, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.3448002180666873, - "sentence_nr": 7 + "score": 0.5323991480984563, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.20608572305725564, - "sentence_nr": 7 + "score": 0.5275588446482796, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.4704943905570542, - "sentence_nr": 7 + "score": 0.6824121095569455, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.085416483900781, - "sentence_nr": 7 + "score": 0.4698824517223119, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.2825804066750608, - "sentence_nr": 7 + "score": 0.6509974368827985, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.11452508920842025, - "sentence_nr": 7 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.3212742401272785, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.15478222669012726, - "sentence_nr": 7 + "score": 0.465943811426769, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.3550584759508654, - "sentence_nr": 7 + "score": 0.646929348297808, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.07875433150726119, - "sentence_nr": 7 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.2638954513805452, - "sentence_nr": 7 + "score": 0.001973164956590371, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.10734088848154077, - "sentence_nr": 7 + "score": 0.39626726411474644, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.33946796348247366, - "sentence_nr": 7 + "score": 0.5928013371853409, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.17795920517030017, - "sentence_nr": 7 + "score": 0.2348553453946444, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.41862955401967455, - "sentence_nr": 7 + "score": 0.4891959123914518, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.19388048412249795, - "sentence_nr": 7 + "score": 0.4222656487192343, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.44361702376789247, - "sentence_nr": 7 + "score": 0.6421614792137705, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.1237012344369667, - "sentence_nr": 7 + "score": 0.20174173621464261, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.33331866832253354, - "sentence_nr": 7 + "score": 0.5179166118048267, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.15589802574348086, - "sentence_nr": 7 + "score": 0.5116634146141776, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.37894206802233305, - "sentence_nr": 7 + "score": 0.6950231685488834, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.1948502778967486, - "sentence_nr": 7 + "score": 0.27720246067551324, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.35525815981538433, - "sentence_nr": 7 + "score": 0.543353961410956, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.1618333627385132, - "sentence_nr": 7 + "score": 0.1658317981046275, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.3458746996740858, - "sentence_nr": 7 + "score": 0.4396479745504188, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.17393111207515277, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.39042812195808824, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.19064689695123957, - "sentence_nr": 7 + "score": 0.5076222240986388, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.36954921822756504, - "sentence_nr": 7 + "score": 0.6899856343139605, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.1785851272602057, - "sentence_nr": 7 + "score": 0.42072143291659103, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.3800733399524004, - "sentence_nr": 7 + "score": 0.6265956117333142, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.20113943179758872, - "sentence_nr": 7 + "score": 0.00591195237335994, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.5054929215592371, - "sentence_nr": 7 + "score": 0.15760341237876357, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.07088281524771703, - "sentence_nr": 7 + "score": 0.5079711118438801, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.1725752257112697, - "sentence_nr": 7 + "score": 0.6900890595896133, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.11901413329120636, - "sentence_nr": 7 + "score": 0.07446712399912313, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.2908877283991857, - "sentence_nr": 7 + "score": 0.30485244492635144, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.15593857496482408, - "sentence_nr": 7 + "score": 0.3394216003840941, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.3832822126692406, - "sentence_nr": 7 + "score": 0.5399361239256494, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.21107720643690867, - "sentence_nr": 7 + "score": 0.3461114139111442, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.43911506176829573, - "sentence_nr": 7 + "score": 0.5537111972654953, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.13410301071131794, - "sentence_nr": 8 + "score": 0.353179331599201, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.3942932268034351, - "sentence_nr": 8 + "score": 0.5558881348090785, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.30677064886592076, - "sentence_nr": 8 + "score": 0.33522833358360765, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.5308555945242818, - "sentence_nr": 8 + "score": 0.534195929930943, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.1327526847508867, - "sentence_nr": 8 + "score": 0.3279338213872338, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.37850602486495205, - "sentence_nr": 8 + "score": 0.5206722319482356, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.18405035438430847, - "sentence_nr": 8 + "score": 0.1900249500296748, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4142901090120915, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.061826017721563604, - "sentence_nr": 8 + "score": 0.4570054063295732, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.08852681798207009, - "sentence_nr": 8 + "score": 0.389868366744335, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.3583179111355935, - "sentence_nr": 8 + "score": 0.6181891240638018, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.3857436691295343, - "sentence_nr": 8 + "score": 0.30595231029570097, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.5750224388123065, - "sentence_nr": 8 + "score": 0.5076226900210099, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.5920893212447781, - "sentence_nr": 8 + "score": 0.37858398735109683, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.6925021521158101, - "sentence_nr": 8 + "score": 0.5535757116038853, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.22478613858269392, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.44348101018104913, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.3201978307646018, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.5281184078781382, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.1627842130495941, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.183687049781416, - "sentence_nr": 8 + "score": 0.3513085830979839, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.351911486970854, - "sentence_nr": 8 + "score": 0.30576442771176066, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.5181825846579515, - "sentence_nr": 8 + "score": 0.5175418593642837, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.17328174803055044, - "sentence_nr": 8 + "score": 0.3734491516745214, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.3178268797869574, - "sentence_nr": 8 + "score": 0.5499493819792871, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.286608441075188, - "sentence_nr": 8 + "score": 0.4397415106513502, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.4579283646292802, - "sentence_nr": 8 + "score": 0.5907735810868658, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.25861130592298187, - "sentence_nr": 8 + "score": 0.31308824228412185, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.39452644092432093, - "sentence_nr": 8 + "score": 0.4950165423717857, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.20379250618355427, - "sentence_nr": 8 + "score": 0.28646584019908145, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.41085414309816914, - "sentence_nr": 8 + "score": 0.4746124656486252, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.310679343206099, - "sentence_nr": 8 + "score": 0.21240535233702176, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.4471183729584148, - "sentence_nr": 8 + "score": 0.3985917300395283, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.2334787866969297, - "sentence_nr": 8 + "score": 0.18416226663843008, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.3621517589760531, - "sentence_nr": 8 + "score": 0.409963740738593, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.5866873582151947, - "sentence_nr": 8 + "score": 0.5597983233407279, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.46269559069048716, - "sentence_nr": 8 + "score": 0.030041173262958625, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.46872641361415845, - "sentence_nr": 8 + "score": 0.16461275738712375, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.10434360980785336, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.3012789660952507, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 8 + "score": 0.28710736118585223, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 + "score": 0.47019373110040275, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 8 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.13835317113453516, - "sentence_nr": 8 + "score": 0.02574148824389794, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.16343842313572918, - "sentence_nr": 8 + "score": 0.2111955699760469, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.3986641525285075, - "sentence_nr": 8 + "score": 0.428831679677381, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 + "score": 0.3612717557348476, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.5553909583113487, - "sentence_nr": 8 + "score": 0.5558371668340614, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.1690979933029136, - "sentence_nr": 8 + "score": 0.46890796443667687, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.3751861276375209, - "sentence_nr": 8 + "score": 0.6445795993451092, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.2840563956846642, - "sentence_nr": 8 + "score": 0.30704694388456133, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.5110250591004448, - "sentence_nr": 8 + "score": 0.5217468869740803, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.19920413481788912, - "sentence_nr": 8 + "score": 0.3675770737978913, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.42537796926163113, - "sentence_nr": 8 + "score": 0.5833872029429698, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.20401796878756984, - "sentence_nr": 8 + "score": 0.2381080412543041, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.43317630453631556, - "sentence_nr": 8 + "score": 0.4678770958208047, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.2044887070217883, - "sentence_nr": 8 + "score": 0.12508074021419405, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.38471585132587544, - "sentence_nr": 8 + "score": 0.39088781423976093, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.2980504190448601, - "sentence_nr": 8 + "score": 0.4481437122587742, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.5101268920225042, - "sentence_nr": 8 + "score": 0.6475959138561268, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.9436043261706615, - "sentence_nr": 8 + "score": 0.3699375619378516, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.9880191679951993, - "sentence_nr": 8 + "score": 0.5932397042974766, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.18831933500600306, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.4318025704181776, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.21544027588567594, - "sentence_nr": 8 + "score": 0.4346391355101555, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.5040038440508637, - "sentence_nr": 8 + "score": 0.648783727613815, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.26970223719007375, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5172978597562362, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.30630098078522544, - "sentence_nr": 8 + "score": 0.24647819790998704, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.5439056051092116, - "sentence_nr": 8 + "score": 0.4673628408395248, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.19850842371858787, - "sentence_nr": 8 + "score": 0.35045670972629744, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.43584341835040474, - "sentence_nr": 8 + "score": 0.564796827816794, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.20170335119323748, - "sentence_nr": 8 + "score": 0.41661325369918395, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.3541251997977811, - "sentence_nr": 8 + "score": 0.587715140145979, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.10508106635796587, - "sentence_nr": 8 + "score": 0.3937759330018993, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.3182774828667731, - "sentence_nr": 8 + "score": 0.5961746226614889, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.2309552734743087, - "sentence_nr": 8 + "score": 0.4088174428659509, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.43975656978777905, - "sentence_nr": 8 + "score": 0.5924365007019256, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.23530033724858213, - "sentence_nr": 8 + "score": 0.3565944577029545, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.46208607300298377, - "sentence_nr": 8 + "score": 0.5419443217291802, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.37284027455688556, - "sentence_nr": 8 + "score": 0.5003903156428934, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.5528347504734102, - "sentence_nr": 8 + "score": 0.6591036004593714, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.2887308472548599, - "sentence_nr": 8 + "score": 0.4075013846828396, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.38846174119508314, - "sentence_nr": 8 + "score": 0.609574810318951, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.15487293534817623, - "sentence_nr": 8 + "score": 0.48070461563699834, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.39293494862736383, - "sentence_nr": 8 + "score": 0.6555925126677848, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.21741853044139284, - "sentence_nr": 8 + "score": 0.46685213488332356, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.3535910166292039, - "sentence_nr": 8 + "score": 0.6452685695102438, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.33626819961829335, - "sentence_nr": 8 + "score": 0.17827499805988958, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.5466581859383387, - "sentence_nr": 8 + "score": 0.4226865520698467, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.32000331642122953, - "sentence_nr": 8 + "score": 0.509712898465703, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.5480591855923784, - "sentence_nr": 8 + "score": 0.6788015977861386, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.21132630077912357, - "sentence_nr": 8 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.4175670766052166, - "sentence_nr": 8 + "score": 0.0701361033487376, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.13108369255325433, - "sentence_nr": 8 + "score": 0.4246487921338825, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.3929302741911199, - "sentence_nr": 8 + "score": 0.6150058842025391, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.20174045447955946, - "sentence_nr": 8 + "score": 0.16285971091078436, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.33729298835089516, - "sentence_nr": 8 + "score": 0.43885470392891923, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.20972571494011877, - "sentence_nr": 9 + "score": 0.37339369029886144, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.395894071208527, - "sentence_nr": 9 + "score": 0.5432112723704581, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.23751632756038837, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.35369375385786006, - "sentence_nr": 9 + "score": 0.4414396968637268, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.13087682931309413, - "sentence_nr": 9 + "score": 0.23132615410621146, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.19462952976787054, - "sentence_nr": 9 + "score": 0.43514359502154976, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.1741525384512767, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.013538497707846785, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1570208067577934, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.4113045280468524, - "sentence_nr": 9 + "score": 0.45255374179790936, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.15082713742973322, - "sentence_nr": 9 + "score": 0.11702528754872281, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.3965911699770542, - "sentence_nr": 9 + "score": 0.35961444717857005, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.15471428129658016, - "sentence_nr": 9 + "score": 0.38197969936092163, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.4580211317461481, - "sentence_nr": 9 + "score": 0.5508810570148596, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.18928475425929295, - "sentence_nr": 9 + "score": 0.2697061457592779, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.4916060435820526, - "sentence_nr": 9 + "score": 0.45539079735897503, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.21940429389247643, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.4343280866601455, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 9 + "score": 0.28350073467974646, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 + "score": 0.4913615840203272, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.1824401863423467, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.36709433185688595, - "sentence_nr": 9 + "score": 0.0012419274714356686, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.3377854698776805, - "sentence_nr": 9 + "score": 0.2274056095104063, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.521201229892482, - "sentence_nr": 9 + "score": 0.45103469245105887, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.12475846123062707, - "sentence_nr": 9 + "score": 0.21791041776703116, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.27823340731817514, - "sentence_nr": 9 + "score": 0.44004369960566136, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.10759927692349745, - "sentence_nr": 9 + "score": 0.46773190351581395, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.21065794536310511, - "sentence_nr": 9 + "score": 0.6215065422343401, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.07843772989359644, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.1324578891826276, - "sentence_nr": 9 + "score": 0.2799135631577256, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.10136628610815898, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.08163977068875294, - "sentence_nr": 9 + "score": 0.30045915824023645, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.09047502044256338, - "sentence_nr": 9 + "score": 0.1129192185025187, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.21669141850731985, - "sentence_nr": 9 + "score": 0.3036955697945895, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.10322985794794913, - "sentence_nr": 9 + "score": 0.1146190984378276, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.24491122482530842, - "sentence_nr": 9 + "score": 0.2705636478589466, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.11809057094812304, - "sentence_nr": 9 + "score": 0.27735384192405904, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.27930342777387007, - "sentence_nr": 9 + "score": 0.5117239199116311, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.21268444697113978, - "sentence_nr": 9 + "score": 0.2773615322238364, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.3229997133764549, - "sentence_nr": 9 + "score": 0.4616718172834648, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.1475503033983142, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.22104108935973044, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.16434349396840395, - "sentence_nr": 9 + "score": 0.27918670410574553, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.28582614857210975, - "sentence_nr": 9 + "score": 0.4946319454441461, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.10085167559661873, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.23831215045289575, - "sentence_nr": 9 + "score": 0.001482799525504152, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.17543744527808774, - "sentence_nr": 9 + "score": 0.11313747467095658, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.28201016956553354, - "sentence_nr": 9 + "score": 0.31131411877123494, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.12274092982883021, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.3385513651938691, - "sentence_nr": 9 + "score": 0.2035993189596312, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.13511029141047634, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.31017716089889963, - "sentence_nr": 9 + "score": 0.36856155052346085, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.21688283061839067, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.41775824162589076, - "sentence_nr": 9 + "score": 0.17779867452221493, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.12162779391619735, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.3228288840559658, - "sentence_nr": 9 + "score": 0.1806947117206154, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.18237599479708327, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.3740403511567824, - "sentence_nr": 9 + "score": 0.17199314222249618, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.12876689524369925, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.3253153379449275, - "sentence_nr": 9 + "score": 0.18992468117577804, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.119159749312327, - "sentence_nr": 9 + "score": 0.2533256789438013, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.21297942664093145, - "sentence_nr": 9 + "score": 0.46990234871498904, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.2036348471340078, - "sentence_nr": 9 + "score": 0.1218148752860121, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.3472831655579266, - "sentence_nr": 9 + "score": 0.345672681124001, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "om", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.24362353508932386, - "sentence_nr": 9 + "score": 0.17608422487251038, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.28135849152758385, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.14482189302397735, - "sentence_nr": 9 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.2913876815877049, - "sentence_nr": 9 + "score": 0.17408200912027585, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.16306957103469613, - "sentence_nr": 9 + "score": 0.2005845691509901, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 9 + "score": 0.054746206230409135, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.9199349282509897, - "sentence_nr": 9 + "score": 0.20233084789265965, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.20075037608245913, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.28112283847231073, - "sentence_nr": 9 + "score": 0.4313422346882818, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.1308613527030366, - "sentence_nr": 9 + "score": 0.1874333361540541, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.3063146286877558, - "sentence_nr": 9 + "score": 0.41793053821849296, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.12787395553510186, - "sentence_nr": 9 + "score": 0.3716703379730988, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.21931515993565381, - "sentence_nr": 9 + "score": 0.5800216707448408, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.1617336445898746, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.1441966459257424, - "sentence_nr": 9 + "score": 0.41103371741191813, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.2939876705913701, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.14957316612525498, - "sentence_nr": 9 + "score": 0.513822906290756, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.27675048474641756, - "sentence_nr": 9 + "score": 0.34882223508522014, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.3780460244391623, - "sentence_nr": 9 + "score": 0.5382666998696708, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.12503614625842938, - "sentence_nr": 9 + "score": 0.16815129512086885, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.20624064341134082, - "sentence_nr": 9 + "score": 0.42564250588688346, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.3337393033483562, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.3368893372278425, - "sentence_nr": 9 + "score": 0.5341770443205455, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.3334625511884235, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.2961559727627133, - "sentence_nr": 9 + "score": 0.5084204520879435, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.12846497020051437, - "sentence_nr": 9 + "score": 0.43467677589817527, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.2670865602673704, - "sentence_nr": 9 + "score": 0.6164945332495145, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.28252374116432993, - "sentence_nr": 9 + "score": 0.19883793649788686, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.3549531183419122, - "sentence_nr": 9 + "score": 0.4425054083753717, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.24129507267532274, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.26128489301072644, - "sentence_nr": 9 + "score": 0.4979085300830714, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 + "score": 0.15080316480304565, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", "task": "translation", "metric": "chrf", - "score": 0.2126837065505244, - "sentence_nr": 9 + "score": 0.4424628792965376, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 + "score": 0.2777349520199055, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "score": 0.5312509449503231, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "uz", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 + "score": 0.20390263030337064, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "uz", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "score": 0.4345747929502553, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "uz", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 + "score": 0.25059044332210606, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "uz", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "score": 0.5062568722858068, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "uz", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 10 + "score": 0.058621811091044064, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.07149097424598219, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "uz", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "score": 0.26495616287092694, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", "task": "translation", "metric": "bleu", - "score": 0.7774075575820374, - "sentence_nr": 10 + "score": 0.2263798936383913, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", "task": "translation", "metric": "chrf", - "score": 0.8943538262827356, - "sentence_nr": 10 + "score": 0.5010204752876567, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "uz", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 + "score": 0.37144367036148984, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "uz", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "score": 0.5493384036554019, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", "task": "translation", "metric": "bleu", - "score": 0.18639667871924825, - "sentence_nr": 10 + "score": 0.3165559728474609, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", "task": "translation", "metric": "chrf", - "score": 0.4540232715517938, - "sentence_nr": 10 + "score": 0.5621775968535822, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "uz", "task": "translation", "metric": "bleu", - "score": 0.8431643718744966, - "sentence_nr": 10 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "uz", "task": "translation", "metric": "chrf", - "score": 0.9341410275694613, - "sentence_nr": 10 + "score": 0.16207910248424867, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 + "score": 0.2656097416710467, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "score": 0.49446617915326735, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "uz", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "uz", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "score": 0.003921568627450981, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "uz", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 + "score": 0.30060279696865555, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "uz", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "score": 0.5034923934195839, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "az", "task": "translation", "metric": "bleu", - "score": 0.47095916883357913, - "sentence_nr": 10 + "score": 0.17913113678266074, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "az", "task": "translation", "metric": "chrf", - "score": 0.701526330557871, - "sentence_nr": 10 + "score": 0.39874837064673946, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", "task": "translation", "metric": "bleu", - "score": 0.38260294162784475, - "sentence_nr": 10 + "score": 0.39725392028587103, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", "task": "translation", "metric": "chrf", - "score": 0.6692418584049541, - "sentence_nr": 10 + "score": 0.5469018582137435, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", "task": "translation", "metric": "bleu", - "score": 0.4093629115744712, - "sentence_nr": 10 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", "task": "translation", "metric": "chrf", - "score": 0.6243156092220487, - "sentence_nr": 10 + "score": 0.3532549308527307, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", "task": "translation", "metric": "bleu", - "score": 0.36703839483583006, - "sentence_nr": 10 + "score": 0.08124065769691519, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", "task": "translation", "metric": "chrf", - "score": 0.6725357332891145, - "sentence_nr": 10 + "score": 0.36179606551800264, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", "task": "translation", "metric": "bleu", - "score": 0.4322450379367835, - "sentence_nr": 10 + "score": 0.14976259597799593, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", "task": "translation", "metric": "chrf", - "score": 0.697398762810304, - "sentence_nr": 10 + "score": 0.3842337676785057, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", "task": "translation", "metric": "bleu", - "score": 0.41122010762096617, - "sentence_nr": 10 + "score": 0.18524922432663024, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", "task": "translation", "metric": "chrf", - "score": 0.6697492221087861, - "sentence_nr": 10 + "score": 0.4677488632814114, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "az", "task": "translation", "metric": "bleu", - "score": 0.41126318495820946, - "sentence_nr": 10 + "score": 0.2117138550702324, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7254294465493162, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4682601513034942, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "az", "task": "translation", "metric": "chrf", - "score": 0.691130012325589, - "sentence_nr": 10 + "score": 0.4604849061460804, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "az", "task": "translation", "metric": "bleu", - "score": 0.35334199245807973, - "sentence_nr": 10 + "score": 0.2566428979550943, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "az", "task": "translation", "metric": "chrf", - "score": 0.6858610070406853, - "sentence_nr": 10 + "score": 0.472012087459169, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "az", "task": "translation", "metric": "bleu", "score": 0, - "sentence_nr": 10 + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "az", "task": "translation", "metric": "chrf", "score": 0.0, - "sentence_nr": 10 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", "task": "translation", "metric": "bleu", - "score": 0.33061666631099795, - "sentence_nr": 10 + "score": 0.29072641495011164, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", "task": "translation", "metric": "chrf", - "score": 0.5343307680770133, - "sentence_nr": 10 + "score": 0.5355182083154902, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", "task": "translation", "metric": "bleu", - "score": 0.38981415389445495, - "sentence_nr": 10 + "score": 0.05513343823020891, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", "task": "translation", "metric": "chrf", - "score": 0.665622189515994, - "sentence_nr": 10 + "score": 0.24973538251018115, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "az", "task": "translation", "metric": "bleu", - "score": 0.3223937524276847, - "sentence_nr": 10 + "score": 0.2410257388716231, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "az", "task": "translation", "metric": "chrf", - "score": 0.6719135382778884, - "sentence_nr": 10 + "score": 0.47108004929437347, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", "task": "translation", "metric": "bleu", - "score": 0.4466645979681496, - "sentence_nr": 10 + "score": 0.3625318570509803, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", "task": "translation", "metric": "chrf", - "score": 0.714247354760266, - "sentence_nr": 10 + "score": 0.5303179877188419, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", "task": "translation", "metric": "bleu", - "score": 0.6233091888805312, - "sentence_nr": 10 + "score": 0.35328951154883514, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", "task": "translation", "metric": "chrf", - "score": 0.7757111039890131, - "sentence_nr": 10 + "score": 0.5286973900062114, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "su", "task": "translation", "metric": "bleu", - "score": 0.33414322499224436, - "sentence_nr": 10 + "score": 0.3816118513389601, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "su", "task": "translation", "metric": "chrf", - "score": 0.7159580680193959, - "sentence_nr": 10 + "score": 0.5423632561644341, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", "task": "translation", "metric": "bleu", - "score": 0.6620694102966999, - "sentence_nr": 10 + "score": 0.3825612041950578, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", "task": "translation", "metric": "chrf", - "score": 0.7893416551805176, - "sentence_nr": 10 + "score": 0.5448874224723139, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", "task": "translation", "metric": "bleu", - "score": 0.26540383860058264, - "sentence_nr": 10 + "score": 0.2914614724265088, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", "task": "translation", "metric": "chrf", - "score": 0.51610805930355, - "sentence_nr": 10 + "score": 0.49272317726959486, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", "task": "translation", "metric": "bleu", - "score": 0.49335830881778164, - "sentence_nr": 10 + "score": 0.31701648962901274, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", "task": "translation", "metric": "chrf", - "score": 0.7240615166053675, - "sentence_nr": 10 + "score": 0.5369836185626417, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", "task": "translation", "metric": "bleu", - "score": 0.4024279293206815, - "sentence_nr": 10 + "score": 0.4536812813438368, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", "task": "translation", "metric": "chrf", - "score": 0.6798070651801875, - "sentence_nr": 10 + "score": 0.6033688389068195, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "su", "task": "translation", "metric": "bleu", - "score": 0.6153147385756811, - "sentence_nr": 10 + "score": 0.4388695885558457, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "su", "task": "translation", "metric": "chrf", - "score": 0.8160952378322835, - "sentence_nr": 10 + "score": 0.5988319380396017, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "su", "task": "translation", "metric": "bleu", - "score": 0.6838493012537611, - "sentence_nr": 10 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "su", "task": "translation", "metric": "chrf", - "score": 0.8178509424142287, - "sentence_nr": 10 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", "task": "translation", "metric": "bleu", - "score": 0.5169198985488462, - "sentence_nr": 10 + "score": 0.3891908674355695, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", "task": "translation", "metric": "chrf", - "score": 0.7879691803533485, - "sentence_nr": 10 + "score": 0.5638438488395793, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", "task": "translation", "metric": "bleu", - "score": 0.5223010192696725, - "sentence_nr": 10 + "score": 0.0562284009388899, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", "task": "translation", "metric": "chrf", - "score": 0.7442134884509299, - "sentence_nr": 10 + "score": 0.2499832582979363, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "su", "task": "translation", "metric": "bleu", - "score": 0.3885151883045163, - "sentence_nr": 10 + "score": 0.39803983519369723, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "su", "task": "translation", "metric": "chrf", - "score": 0.6763151870864087, - "sentence_nr": 10 + "score": 0.5925122761777685, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", "task": "translation", "metric": "bleu", - "score": 0.5985488590218004, - "sentence_nr": 10 + "score": 0.4427412215990632, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", "task": "translation", "metric": "chrf", - "score": 0.8248561222494313, - "sentence_nr": 10 + "score": 0.6222208791908107, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", "task": "translation", "metric": "bleu", - "score": 0.37163791993879014, - "sentence_nr": 10 + "score": 0.3709917965006414, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", "task": "translation", "metric": "chrf", - "score": 0.6792432753943116, - "sentence_nr": 10 + "score": 0.6015654773262525, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "nl", "task": "translation", "metric": "bleu", - "score": 0.5152630372775983, - "sentence_nr": 10 + "score": 0.3899394268518547, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "nl", "task": "translation", "metric": "chrf", - "score": 0.7696821316655393, - "sentence_nr": 10 + "score": 0.594841687625348, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", "task": "translation", "metric": "bleu", - "score": 0.43521980294891405, - "sentence_nr": 10 + "score": 0.37937217700502807, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", "task": "translation", "metric": "chrf", - "score": 0.7204319998551938, - "sentence_nr": 10 + "score": 0.585570521448987, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", "task": "translation", "metric": "bleu", - "score": 0.46417187236805535, - "sentence_nr": 10 + "score": 0.2935817756923911, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", "task": "translation", "metric": "chrf", - "score": 0.6653227698984816, - "sentence_nr": 10 + "score": 0.5258167932749879, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", "task": "translation", "metric": "bleu", - "score": 0.519124054532681, - "sentence_nr": 10 + "score": 0.3069539363570848, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", "task": "translation", "metric": "chrf", - "score": 0.7733428788002137, - "sentence_nr": 10 + "score": 0.54212160924242, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", "task": "translation", "metric": "bleu", - "score": 0.5083170211670072, - "sentence_nr": 10 + "score": 0.3837706446662323, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", "task": "translation", "metric": "chrf", - "score": 0.755952798269267, - "sentence_nr": 10 + "score": 0.6114841751733563, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", "task": "translation", "metric": "bleu", - "score": 0.3161432307247198, - "sentence_nr": 10 + "score": 0.47036083421186914, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", "task": "translation", "metric": "chrf", - "score": 0.5990810117425377, - "sentence_nr": 10 + "score": 0.6284217372117649, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "nl", "task": "translation", "metric": "bleu", - "score": 0.40980949787910764, - "sentence_nr": 10 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "nl", "task": "translation", "metric": "chrf", - "score": 0.7145653936496129, - "sentence_nr": 10 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", "task": "translation", "metric": "bleu", - "score": 0.5770135999436572, - "sentence_nr": 10 + "score": 0.43771450361962905, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", "task": "translation", "metric": "chrf", - "score": 0.7697316849447288, - "sentence_nr": 10 + "score": 0.6241615593947294, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", "task": "translation", "metric": "bleu", - "score": 0.7030214416074754, - "sentence_nr": 10 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", "task": "translation", "metric": "chrf", - "score": 0.8357829168322639, - "sentence_nr": 10 + "score": 0.002470966147763776, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", "task": "translation", "metric": "bleu", - "score": 0.49199339399396913, - "sentence_nr": 10 + "score": 0.41876701425580165, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", "task": "translation", "metric": "chrf", - "score": 0.713934780293142, - "sentence_nr": 10 + "score": 0.6023072470951277, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", "task": "translation", "metric": "bleu", - "score": 0.5002824356846001, - "sentence_nr": 10 + "score": 0.23150269995638142, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", "task": "translation", "metric": "chrf", - "score": 0.7029341279811726, - "sentence_nr": 10 + "score": 0.46961020207805865, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.23712278533862596, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", "task": "translation", "metric": "chrf", - "score": 0.029124970213905314, - "sentence_nr": 11 + "score": 0.5116265380743877, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ary", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.16820174403705807, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ary", "task": "translation", "metric": "chrf", - "score": 0.1779610499753793, - "sentence_nr": 11 + "score": 0.4159468803310715, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ary", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.14411824146646438, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ary", "task": "translation", "metric": "chrf", - "score": 0.05989397907532586, - "sentence_nr": 11 + "score": 0.3888201933329776, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ary", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.20772664892358625, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.13539167567510446, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "ary", "task": "translation", "metric": "chrf", - "score": 0.03073685498855941, - "sentence_nr": 11 + "score": 0.4530317770012902, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.08742348900087889, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", "task": "translation", "metric": "chrf", - "score": 0.08933758530290428, - "sentence_nr": 11 + "score": 0.3483375322546142, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "ary", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.22620136486770118, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "ary", "task": "translation", "metric": "chrf", - "score": 0.21051269871304829, - "sentence_nr": 11 + "score": 0.48605914376103504, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.17808024265318068, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", "task": "translation", "metric": "chrf", - "score": 0.18854722085547196, - "sentence_nr": 11 + "score": 0.4325975219749186, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "ary", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "ary", "task": "translation", "metric": "chrf", - "score": 0.1387123733773652, - "sentence_nr": 11 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 11 + "score": 0.21950096276063155, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 11 + "score": 0.4615148727332789, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ary", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 11 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ary", "task": "translation", "metric": "chrf", - "score": 0.05499461839884487, - "sentence_nr": 11 + "score": 0.0014697236919459144, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ary", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.21002599862757135, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ary", "task": "translation", "metric": "chrf", - "score": 0.19978068293555115, - "sentence_nr": 11 + "score": 0.4392687670949058, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "uk", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.30495379106243414, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "uk", "task": "translation", "metric": "chrf", - "score": 0.1388011701223677, - "sentence_nr": 11 + "score": 0.503838460756843, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.5238129782835811, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", "task": "translation", "metric": "chrf", - "score": 0.1460389336009171, - "sentence_nr": 11 + "score": 0.6833665118503387, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.3682745409081855, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", "task": "translation", "metric": "chrf", - "score": 0.038796252164058714, - "sentence_nr": 11 + "score": 0.5329336102063273, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.32747218107784076, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", "task": "translation", "metric": "chrf", - "score": 0.1756002877791377, - "sentence_nr": 11 + "score": 0.54609482853432, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.28824791607535494, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", "task": "translation", "metric": "chrf", - "score": 0.0029868578255675027, - "sentence_nr": 11 + "score": 0.5206551995745454, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.324365548882549, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", "task": "translation", "metric": "chrf", - "score": 0.015380253532528225, - "sentence_nr": 11 + "score": 0.5907155236217757, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "uk", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.5679645191995755, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "uk", "task": "translation", "metric": "chrf", - "score": 0.19065171436703615, - "sentence_nr": 11 + "score": 0.7028524535352202, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.4619461496096305, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", "task": "translation", "metric": "chrf", - "score": 0.21083781655774478, - "sentence_nr": 11 + "score": 0.6211197769248664, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "uk", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.1773290356139862, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "uk", "task": "translation", "metric": "chrf", - "score": 0.14590438247348272, - "sentence_nr": 11 + "score": 0.45665875592491983, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 11 + "score": 0.48063509995871484, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 11 + "score": 0.6455694687360541, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "uk", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 11 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "uk", "task": "translation", "metric": "chrf", - "score": 0.04379419293412465, - "sentence_nr": 11 + "score": 0.001876876876876877, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "uk", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.4024307797146222, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "uk", "task": "translation", "metric": "chrf", - "score": 0.15119622228734425, - "sentence_nr": 11 + "score": 0.5799489971425524, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.09425983742608171, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", "task": "translation", "metric": "chrf", - "score": 0.21315318926996712, - "sentence_nr": 11 + "score": 0.32871133484905984, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.1500767455847696, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", "task": "translation", "metric": "chrf", - "score": 0.16991425356152365, - "sentence_nr": 11 + "score": 0.35247049201056063, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "yo", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 11 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "yo", "task": "translation", "metric": "chrf", - "score": 0.22371589981083434, - "sentence_nr": 11 + "score": 0.25911153048152963, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.12339725436856788, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", "task": "translation", "metric": "chrf", - "score": 0.20982178138488494, - "sentence_nr": 11 + "score": 0.30862388504827054, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.03683469030327237, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", "task": "translation", "metric": "chrf", - "score": 0.20189358781069322, - "sentence_nr": 11 + "score": 0.1883651540330025, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 11 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", "task": "translation", "metric": "chrf", - "score": 0.20261685251676126, - "sentence_nr": 11 + "score": 0.2475555473086587, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.18059014320852598, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", "task": "translation", "metric": "chrf", - "score": 0.226729844497646, - "sentence_nr": 11 + "score": 0.4126878831445088, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.08952221293821708, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", "task": "translation", "metric": "chrf", - "score": 0.18184342512086546, - "sentence_nr": 11 + "score": 0.3302185725289447, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "yo", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "yo", "task": "translation", "metric": "chrf", - "score": 0.2185121523322681, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.17386106914161167, - "sentence_nr": 11 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.11089707106904065, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", "task": "translation", "metric": "chrf", - "score": 0.08272059515141832, - "sentence_nr": 11 + "score": 0.32257370439479693, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 11 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", "task": "translation", "metric": "chrf", - "score": 0.1814025725787457, - "sentence_nr": 11 + "score": 0.1542381010660205, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.10041397006304215, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", "task": "translation", "metric": "chrf", - "score": 0.23945930551153607, - "sentence_nr": 11 + "score": 0.267235994708103, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 11 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", "task": "translation", "metric": "chrf", - "score": 0.20815933215961574, - "sentence_nr": 11 + "score": 0.1714827465806386, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.20461279328052204, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", "task": "translation", "metric": "chrf", - "score": 0.09886053260067004, - "sentence_nr": 11 + "score": 0.40700264333409225, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ig", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.022279489478813384, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ig", "task": "translation", "metric": "chrf", - "score": 0.14345644530149382, - "sentence_nr": 11 + "score": 0.1674529343985772, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.15383749998898477, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", "task": "translation", "metric": "chrf", - "score": 0.19097844728039898, - "sentence_nr": 11 + "score": 0.31499362867220904, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.0294817052888944, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", "task": "translation", "metric": "chrf", - "score": 0.08246021416977749, - "sentence_nr": 11 + "score": 0.2003567940058514, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.023921714345296125, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", "task": "translation", "metric": "chrf", - "score": 0.18868639139421345, - "sentence_nr": 11 + "score": 0.19521241186114444, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.42252912000328696, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", "task": "translation", "metric": "chrf", - "score": 0.20665565461558383, - "sentence_nr": 11 + "score": 0.6107603222807394, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 11 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", "task": "translation", "metric": "chrf", - "score": 0.17764901410543646, - "sentence_nr": 11 + "score": 0.15710835559719724, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ig", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ig", "task": "translation", "metric": "chrf", - "score": 0.19312651305380893, - "sentence_nr": 11 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.24624631147355844, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", "task": "translation", "metric": "chrf", - "score": 0.21371557282714232, - "sentence_nr": 11 + "score": 0.4245128586654577, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 11 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", "task": "translation", "metric": "chrf", - "score": 0.18854043679878274, - "sentence_nr": 11 + "score": 0.05997773337422933, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.15594400017066484, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", "task": "translation", "metric": "chrf", - "score": 0.19559831357902827, - "sentence_nr": 11 + "score": 0.35666553322400163, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 + "score": 0.4763809450534613, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", "task": "translation", "metric": "chrf", - "score": 0.1914895496057553, - "sentence_nr": 11 + "score": 0.6797378130452167, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", "task": "translation", "metric": "bleu", - "score": 0.6666935927206881, - "sentence_nr": 12 + "score": 0.36983487280597815, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", "task": "translation", "metric": "chrf", - "score": 0.7957561291403441, - "sentence_nr": 12 + "score": 0.5775204256764592, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ceb", "task": "translation", "metric": "bleu", - "score": 0.34999116613463505, - "sentence_nr": 12 + "score": 0.3926191044336021, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ceb", "task": "translation", "metric": "chrf", - "score": 0.6356075517191035, - "sentence_nr": 12 + "score": 0.5853598001081626, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ceb", "task": "translation", "metric": "bleu", - "score": 0.48649824146709, - "sentence_nr": 12 + "score": 0.392855031610931, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ceb", "task": "translation", "metric": "chrf", - "score": 0.6763447333054696, - "sentence_nr": 12 + "score": 0.5596297716783123, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ceb", "task": "translation", "metric": "bleu", - "score": 0.367622917844187, - "sentence_nr": 12 + "score": 0.3242365809732156, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5615050712672139, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4081538556642202, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "ceb", "task": "translation", "metric": "chrf", - "score": 0.46386216052527535, - "sentence_nr": 12 + "score": 0.49383937848408926, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", "task": "translation", "metric": "bleu", - "score": 0.4300174433641992, - "sentence_nr": 12 + "score": 0.3757402904418656, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", "task": "translation", "metric": "chrf", - "score": 0.5099800158255156, - "sentence_nr": 12 + "score": 0.5824540571269318, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "ceb", "task": "translation", "metric": "bleu", - "score": 0.7963205130973803, - "sentence_nr": 12 + "score": 0.48799726436702184, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "ceb", "task": "translation", "metric": "chrf", - "score": 0.8101688749569373, - "sentence_nr": 12 + "score": 0.6640059364745422, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", "task": "translation", "metric": "bleu", - "score": 0.6570128212612868, - "sentence_nr": 12 + "score": 0.4657084160452196, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", "task": "translation", "metric": "chrf", - "score": 0.6262090565616182, - "sentence_nr": 12 + "score": 0.6161150236110055, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "ceb", "task": "translation", "metric": "bleu", - "score": 0.5866943184579982, - "sentence_nr": 12 + "score": 0.025129524427934438, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "ceb", "task": "translation", "metric": "chrf", - "score": 0.6390393619950272, - "sentence_nr": 12 + "score": 0.18707827466681354, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 12 + "score": 0.5523722075163756, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 12 + "score": 0.7123995987021648, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ceb", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 + "score": 0.003982293944449671, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ceb", "task": "translation", "metric": "chrf", - "score": 0.01047222192173988, - "sentence_nr": 12 + "score": 0.0970559370254647, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ceb", "task": "translation", "metric": "bleu", - "score": 0.5683565265173782, - "sentence_nr": 12 + "score": 0.4340730821892422, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ceb", "task": "translation", "metric": "chrf", - "score": 0.7072367582469653, - "sentence_nr": 12 + "score": 0.586288470842118, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "awa", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 + "score": 0.4865718767050507, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "awa", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 12 + "score": 0.6519330394001581, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", "task": "translation", "metric": "bleu", - "score": 0.20287366424876002, - "sentence_nr": 12 + "score": 0.4961186750382622, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", "task": "translation", "metric": "chrf", - "score": 0.5368464080033196, - "sentence_nr": 12 + "score": 0.6420056154822653, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", "task": "translation", "metric": "bleu", - "score": 0.5198707241967666, - "sentence_nr": 12 + "score": 0.4047209070683015, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", "task": "translation", "metric": "chrf", - "score": 0.6993305416237223, - "sentence_nr": 12 + "score": 0.5746231903096143, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", "task": "translation", "metric": "bleu", - "score": 0.36603776814499195, - "sentence_nr": 12 + "score": 0.4349265118717251, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", "task": "translation", "metric": "chrf", - "score": 0.45532918164901276, - "sentence_nr": 12 + "score": 0.6182013213050582, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 + "score": 0.32844247965411666, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", "task": "translation", "metric": "chrf", - "score": 0.13525036115537795, - "sentence_nr": 12 + "score": 0.5261499162036236, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", "task": "translation", "metric": "bleu", - "score": 0.3120848453730729, - "sentence_nr": 12 + "score": 0.41833670648048593, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", "task": "translation", "metric": "chrf", - "score": 0.3474347870952493, - "sentence_nr": 12 + "score": 0.5894419890247544, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "awa", "task": "translation", "metric": "bleu", - "score": 0.7073395735740273, - "sentence_nr": 12 + "score": 0.5093380551269019, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "awa", "task": "translation", "metric": "chrf", - "score": 0.6577952971578602, - "sentence_nr": 12 + "score": 0.6691377633118654, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", "task": "translation", "metric": "bleu", - "score": 0.6570128212612868, - "sentence_nr": 12 + "score": 0.4749320079832654, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", "task": "translation", "metric": "chrf", - "score": 0.6221526807313811, - "sentence_nr": 12 + "score": 0.6616558999065059, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "awa", "task": "translation", "metric": "bleu", - "score": 0.5460462259563637, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "awa", "task": "translation", "metric": "chrf", - "score": 0.6641829079106271, - "sentence_nr": 12 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 + "score": 0.46925131548107546, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", "task": "translation", "metric": "chrf", - "score": 0.04884431803904408, - "sentence_nr": 12 + "score": 0.6524942397625267, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "awa", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 + "score": 0.21825699659573294, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "awa", "task": "translation", "metric": "chrf", - "score": 0.18357384275951122, - "sentence_nr": 12 + "score": 0.4251389388937374, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "awa", "task": "translation", "metric": "bleu", - "score": 0.28073304156067924, - "sentence_nr": 12 + "score": 0.22342876267947934, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "awa", "task": "translation", "metric": "chrf", - "score": 0.360657984953223, - "sentence_nr": 12 + "score": 0.46128579490017735, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", "task": "translation", "metric": "bleu", - "score": 0.46365764298816153, - "sentence_nr": 12 + "score": 0.20522978206415157, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", "task": "translation", "metric": "chrf", - "score": 0.5757521453586436, - "sentence_nr": 12 + "score": 0.4816367810257562, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", "task": "translation", "metric": "bleu", - "score": 0.3147715014841853, - "sentence_nr": 12 + "score": 0.2607066928529267, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", "task": "translation", "metric": "chrf", - "score": 0.5986154863155839, - "sentence_nr": 12 + "score": 0.5190821165076681, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "mg", "task": "translation", "metric": "bleu", - "score": 0.3885646234110734, - "sentence_nr": 12 + "score": 0.3651499702707945, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "mg", "task": "translation", "metric": "chrf", - "score": 0.5051669760132699, - "sentence_nr": 12 + "score": 0.539793217489328, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", "task": "translation", "metric": "bleu", - "score": 0.464413403675355, - "sentence_nr": 12 + "score": 0.32231048454445776, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", "task": "translation", "metric": "chrf", - "score": 0.6291656356697347, - "sentence_nr": 12 + "score": 0.5391040134655213, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", "task": "translation", "metric": "bleu", - "score": 0.30490938758882236, - "sentence_nr": 12 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", "task": "translation", "metric": "chrf", - "score": 0.579088460457721, - "sentence_nr": 12 + "score": 0.1775108912093685, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", "task": "translation", "metric": "bleu", - "score": 0.3758073513458154, - "sentence_nr": 12 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", "task": "translation", "metric": "chrf", - "score": 0.5302950018189692, - "sentence_nr": 12 + "score": 0.19086386208696812, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", "task": "translation", "metric": "bleu", - "score": 0.29308025637967977, - "sentence_nr": 12 + "score": 0.27075549023715834, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", "task": "translation", "metric": "chrf", - "score": 0.5715200997140051, - "sentence_nr": 12 + "score": 0.5063680799048665, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", "task": "translation", "metric": "bleu", - "score": 0.43285599641891276, - "sentence_nr": 12 + "score": 0.1518704145788631, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", "task": "translation", "metric": "chrf", - "score": 0.5551678521355665, - "sentence_nr": 12 + "score": 0.4226430284557875, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "mg", "task": "translation", "metric": "bleu", - "score": 0.25984882476296983, - "sentence_nr": 12 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "mg", "task": "translation", "metric": "chrf", - "score": 0.6305744214119023, - "sentence_nr": 12 + "score": 0.17966694002432385, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", "task": "translation", "metric": "bleu", - "score": 0.48649824146709, - "sentence_nr": 12 + "score": 0.29041968071488244, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", "task": "translation", "metric": "chrf", - "score": 0.7255446918266525, - "sentence_nr": 12 + "score": 0.5200872448265565, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", "task": "translation", "metric": "bleu", - "score": 0.464413403675355, - "sentence_nr": 12 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", "task": "translation", "metric": "chrf", - "score": 0.6853183317800515, - "sentence_nr": 12 + "score": 0.002624671916010499, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", "task": "translation", "metric": "bleu", - "score": 0.34999116613463505, - "sentence_nr": 12 + "score": 0.13211758854099576, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.6356075517191035, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.4426623526629488, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", "task": "translation", "metric": "chrf", - "score": 0.612058732370435, - "sentence_nr": 12 + "score": 0.4082004182520704, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", "task": "translation", "metric": "bleu", - "score": 0.5522004843736675, - "sentence_nr": 12 + "score": 0.4416835863595156, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", "task": "translation", "metric": "chrf", - "score": 0.6166558670381421, - "sentence_nr": 12 + "score": 0.623117008858419, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", "task": "translation", "metric": "bleu", - "score": 0.37954187220913477, - "sentence_nr": 12 + "score": 0.4249436481722545, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", "task": "translation", "metric": "chrf", - "score": 0.5550325994532472, - "sentence_nr": 12 + "score": 0.6187144317500936, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ro", "task": "translation", "metric": "bleu", - "score": 0.3147715014841853, - "sentence_nr": 12 + "score": 0.3867569653562107, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ro", "task": "translation", "metric": "chrf", - "score": 0.521228891025682, - "sentence_nr": 12 + "score": 0.5709420484876131, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", "task": "translation", "metric": "bleu", - "score": 0.3964513253420688, - "sentence_nr": 12 + "score": 0.4321751051142018, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", "task": "translation", "metric": "chrf", - "score": 0.6095420129111676, - "sentence_nr": 12 + "score": 0.627219234526359, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", "task": "translation", "metric": "bleu", - "score": 0.36033217429111203, - "sentence_nr": 12 + "score": 0.3093560853993581, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", "task": "translation", "metric": "chrf", - "score": 0.5550014071110869, - "sentence_nr": 12 + "score": 0.5175656995600133, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", "task": "translation", "metric": "bleu", - "score": 0.33403925633579773, - "sentence_nr": 12 + "score": 0.37381473043548746, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", "task": "translation", "metric": "chrf", - "score": 0.5753930328058733, - "sentence_nr": 12 + "score": 0.5975179598905689, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", "task": "translation", "metric": "bleu", - "score": 0.44882520213790794, - "sentence_nr": 12 + "score": 0.46916325232132, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", "task": "translation", "metric": "chrf", - "score": 0.5856175239899348, - "sentence_nr": 12 + "score": 0.6201105534360691, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", "task": "translation", "metric": "bleu", - "score": 0.42760828727369016, - "sentence_nr": 12 + "score": 0.4355769714618406, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", "task": "translation", "metric": "chrf", - "score": 0.6065010489098535, - "sentence_nr": 12 + "score": 0.5931050934922778, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ro", "task": "translation", "metric": "bleu", - "score": 0.33403925633579773, - "sentence_nr": 12 + "score": 0.2664145897759877, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ro", "task": "translation", "metric": "chrf", - "score": 0.5915394296427854, - "sentence_nr": 12 + "score": 0.4652749975820786, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", "task": "translation", "metric": "bleu", - "score": 0.3212785834179169, - "sentence_nr": 12 + "score": 0.5404337589009207, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", "task": "translation", "metric": "chrf", - "score": 0.6158121620368939, - "sentence_nr": 12 + "score": 0.6743788008091396, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", "task": "translation", "metric": "bleu", - "score": 0.1751489536280261, - "sentence_nr": 12 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", "task": "translation", "metric": "chrf", - "score": 0.378593296276962, - "sentence_nr": 12 + "score": 0.023087320015249598, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", "task": "translation", "metric": "bleu", - "score": 0.3214110553053944, - "sentence_nr": 12 + "score": 0.48139810095340524, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", "task": "translation", "metric": "chrf", - "score": 0.49232390716994445, - "sentence_nr": 12 + "score": 0.6662472676876138, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", "task": "translation", "metric": "bleu", - "score": 0.479033905070678, - "sentence_nr": 12 + "score": 0.621898873312397, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", "task": "translation", "metric": "chrf", - "score": 0.5975149526416976, - "sentence_nr": 12 + "score": 0.7757345897028827, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", "task": "translation", "metric": "bleu", - "score": 0.13150403915662862, - "sentence_nr": 13 + "score": 0.5494249598159933, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", "task": "translation", "metric": "chrf", - "score": 0.21177549089429396, - "sentence_nr": 13 + "score": 0.7465246513770903, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ne", "task": "translation", "metric": "bleu", - "score": 0.1424915360855107, - "sentence_nr": 13 + "score": 0.5335140114876958, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ne", "task": "translation", "metric": "chrf", - "score": 0.23985076149753726, - "sentence_nr": 13 + "score": 0.7053320460577175, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ne", "task": "translation", "metric": "bleu", - "score": 0.13309638637723345, - "sentence_nr": 13 + "score": 0.5119388728106423, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ne", "task": "translation", "metric": "chrf", - "score": 0.18696197122203645, - "sentence_nr": 13 + "score": 0.7182655499139301, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ne", "task": "translation", "metric": "bleu", - "score": 0.12256515595630638, - "sentence_nr": 13 + "score": 0.4899631202302039, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.23303109995893123, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1419886619859991, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "ne", "task": "translation", "metric": "chrf", - "score": 0.24113733359485448, - "sentence_nr": 13 + "score": 0.7271470388040862, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", "task": "translation", "metric": "bleu", - "score": 0.1324448705928064, - "sentence_nr": 13 + "score": 0.5895359597121981, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", "task": "translation", "metric": "chrf", - "score": 0.22863839042697148, - "sentence_nr": 13 + "score": 0.7417056216737207, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "ne", "task": "translation", "metric": "bleu", - "score": 0.12017886776600228, - "sentence_nr": 13 + "score": 0.634306550586776, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "ne", "task": "translation", "metric": "chrf", - "score": 0.20794486026487116, - "sentence_nr": 13 + "score": 0.8018062683769096, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", "task": "translation", "metric": "bleu", - "score": 0.1164257728844972, - "sentence_nr": 13 + "score": 0.6491865905765292, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", "task": "translation", "metric": "chrf", - "score": 0.19249901344360867, - "sentence_nr": 13 + "score": 0.7886752509790889, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "ne", "task": "translation", "metric": "bleu", - "score": 0.12325384013681445, - "sentence_nr": 13 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "ne", "task": "translation", "metric": "chrf", - "score": 0.1960232617116645, - "sentence_nr": 13 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", "task": "translation", "metric": "bleu", - "score": 0.12806473847444227, - "sentence_nr": 13 + "score": 0.6466026323731634, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", "task": "translation", "metric": "chrf", - "score": 0.20054688779645718, - "sentence_nr": 13 + "score": 0.813492453726107, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ne", "task": "translation", "metric": "bleu", - "score": 0.1345714227066951, - "sentence_nr": 13 + "score": 0.11746179377391347, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ne", "task": "translation", "metric": "chrf", - "score": 0.21078968525268058, - "sentence_nr": 13 + "score": 0.40500326360451494, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ne", "task": "translation", "metric": "bleu", - "score": 0.1802615495980454, - "sentence_nr": 13 + "score": 0.43873222664111144, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ne", "task": "translation", "metric": "chrf", - "score": 0.19630112442374525, - "sentence_nr": 13 + "score": 0.6675706911715913, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "mai", "task": "translation", "metric": "bleu", - "score": 0.8212614342207556, - "sentence_nr": 13 + "score": 0.1659862741557369, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "mai", "task": "translation", "metric": "chrf", - "score": 0.7876222308170935, - "sentence_nr": 13 + "score": 0.4884440880714965, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", "task": "translation", "metric": "bleu", - "score": 0.5888582552569348, - "sentence_nr": 13 + "score": 0.40269672228447434, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", "task": "translation", "metric": "chrf", - "score": 0.7876222308170935, - "sentence_nr": 13 + "score": 0.6225404903248234, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", "task": "translation", "metric": "bleu", - "score": 0.5888582552569348, - "sentence_nr": 13 + "score": 0.3695995811393786, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", "task": "translation", "metric": "chrf", - "score": 0.7876222308170935, - "sentence_nr": 13 + "score": 0.6148303949607244, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", "task": "translation", "metric": "bleu", - "score": 0.6871546336787117, - "sentence_nr": 13 + "score": 0.4352046882668779, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 + "score": 0.6480277237944805, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", "task": "translation", "metric": "bleu", - "score": 0.6871546336787117, - "sentence_nr": 13 + "score": 0.2749090602792788, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 + "score": 0.5428290107991267, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", "task": "translation", "metric": "bleu", - "score": 0.43550490048931545, - "sentence_nr": 13 + "score": 0.43090922851400165, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", "task": "translation", "metric": "chrf", - "score": 0.6419345531187637, - "sentence_nr": 13 + "score": 0.6435927083190817, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "mai", "task": "translation", "metric": "bleu", - "score": 0.17539593635425982, - "sentence_nr": 13 + "score": 0.5515333710683049, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "mai", "task": "translation", "metric": "chrf", - "score": 0.3139104155809725, - "sentence_nr": 13 + "score": 0.7161329488530405, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", "task": "translation", "metric": "bleu", - "score": 0.39225487001250453, - "sentence_nr": 13 + "score": 0.5034431110377162, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", "task": "translation", "metric": "chrf", - "score": 0.5189967318357492, - "sentence_nr": 13 + "score": 0.7060517244920654, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "mai", "task": "translation", "metric": "bleu", - "score": 0.12859070457371286, - "sentence_nr": 13 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "mai", "task": "translation", "metric": "chrf", - "score": 0.22162336097079333, - "sentence_nr": 13 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", "task": "translation", "metric": "bleu", - "score": 0.6871546336787117, - "sentence_nr": 13 + "score": 0.5515333710683049, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 + "score": 0.7161440924360718, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "mai", "task": "translation", "metric": "bleu", - "score": 0.6258765997974801, - "sentence_nr": 13 + "score": 0.11143214500495838, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "mai", "task": "translation", "metric": "chrf", - "score": 0.6680248455809015, - "sentence_nr": 13 + "score": 0.4461426742236597, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "mai", "task": "translation", "metric": "bleu", - "score": 0.6258765997974801, - "sentence_nr": 13 + "score": 0.10027952357557399, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "mai", "task": "translation", "metric": "chrf", - "score": 0.6722124517361844, - "sentence_nr": 13 + "score": 0.4397450208902438, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 + "score": 0.2917184142654506, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "as", "task": "translation", "metric": "chrf", - "score": 0.17023327167529265, - "sentence_nr": 13 + "score": 0.5624937546502969, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 + "score": 0.2373642291509686, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", "task": "translation", "metric": "chrf", - "score": 0.2521455524828544, - "sentence_nr": 13 + "score": 0.521644947712484, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "as", "task": "translation", "metric": "bleu", - "score": 0.13150403915662862, - "sentence_nr": 13 + "score": 0.2712572779797431, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "as", "task": "translation", "metric": "chrf", - "score": 0.2229548791980166, - "sentence_nr": 13 + "score": 0.5422335579149541, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 + "score": 0.3013230432873079, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", "task": "translation", "metric": "chrf", - "score": 0.15247670030930355, - "sentence_nr": 13 + "score": 0.5337387225243135, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", "task": "translation", "metric": "bleu", - "score": 0.1324448705928064, - "sentence_nr": 13 + "score": 0.2513195864511859, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", "task": "translation", "metric": "chrf", - "score": 0.23382021475411732, - "sentence_nr": 13 + "score": 0.5265625099865896, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", "task": "translation", "metric": "bleu", - "score": 0.09766807787022613, - "sentence_nr": 13 + "score": 0.2870712623899153, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", "task": "translation", "metric": "chrf", - "score": 0.16788063248730647, - "sentence_nr": 13 + "score": 0.5282814167115877, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", "task": "translation", "metric": "bleu", - "score": 0.12111615182138995, - "sentence_nr": 13 + "score": 0.3551592549479943, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", "task": "translation", "metric": "chrf", - "score": 0.21505717177216926, - "sentence_nr": 13 + "score": 0.5657145174510132, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "as", "task": "translation", "metric": "bleu", - "score": 0.09979796185764318, - "sentence_nr": 13 + "score": 0.21555439980902089, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "as", "task": "translation", "metric": "chrf", - "score": 0.1310501345458609, - "sentence_nr": 13 + "score": 0.4547821375357056, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "as", "task": "translation", "metric": "bleu", - "score": 0.11512937599552589, - "sentence_nr": 13 + "score": 0.26704844930269833, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "as", "task": "translation", "metric": "chrf", - "score": 0.1852451960926282, - "sentence_nr": 13 + "score": 0.5046361305048248, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", "task": "translation", "metric": "bleu", - "score": 0.126642985054506, - "sentence_nr": 13 + "score": 0.37842676092304117, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", "task": "translation", "metric": "chrf", - "score": 0.20913543330915318, - "sentence_nr": 13 + "score": 0.6082386652087882, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", "task": "translation", "metric": "bleu", - "score": 0.12632059501697884, - "sentence_nr": 13 + "score": 0.2194742003215349, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", "task": "translation", "metric": "chrf", - "score": 0.22490978846607526, - "sentence_nr": 13 + "score": 0.44824310405856826, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "as", "task": "translation", "metric": "bleu", - "score": 0.1352612651586241, - "sentence_nr": 13 + "score": 0.20824983259105864, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "as", "task": "translation", "metric": "chrf", - "score": 0.22176710342008016, - "sentence_nr": 13 + "score": 0.467721519642842, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 + "score": 0.42143379809685383, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", "task": "translation", "metric": "chrf", - "score": 0.18982400330057914, - "sentence_nr": 13 + "score": 0.5946121916131629, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", "task": "translation", "metric": "bleu", - "score": 0.11760179026027952, - "sentence_nr": 13 + "score": 0.4141553414774169, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", "task": "translation", "metric": "chrf", - "score": 0.19531596229980544, - "sentence_nr": 13 + "score": 0.5906263169622974, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "ny", "task": "translation", "metric": "bleu", - "score": 0.09968269909242322, - "sentence_nr": 13 + "score": 0.20087168885945464, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "ny", "task": "translation", "metric": "chrf", - "score": 0.14510210137368384, - "sentence_nr": 13 + "score": 0.38939667381078735, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 + "score": 0.2472558107415106, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", "task": "translation", "metric": "chrf", - "score": 0.1204925245474865, - "sentence_nr": 13 + "score": 0.48369132925944186, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", "task": "translation", "metric": "bleu", - "score": 0.12192273449574796, - "sentence_nr": 13 + "score": 0.2741125110896123, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", "task": "translation", "metric": "chrf", - "score": 0.18177358407861108, - "sentence_nr": 13 + "score": 0.4729834657756108, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 + "score": 0.2209042538614563, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", "task": "translation", "metric": "chrf", - "score": 0.16841504132177978, - "sentence_nr": 13 + "score": 0.4506672688399074, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", "task": "translation", "metric": "bleu", - "score": 0.10667790151233097, - "sentence_nr": 13 + "score": 0.4735425104795809, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", "task": "translation", "metric": "chrf", - "score": 0.17427579502643556, - "sentence_nr": 13 + "score": 0.6471388802003065, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", "task": "translation", "metric": "bleu", - "score": 0.1508875367739971, - "sentence_nr": 13 + "score": 0.3765436233955492, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", "task": "translation", "metric": "chrf", - "score": 0.20889434105456664, - "sentence_nr": 13 + "score": 0.5846836525025664, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "ny", "task": "translation", "metric": "bleu", - "score": 0.13184959768302618, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.30505662513933907, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.09878901581794378, - "sentence_nr": 13 + "score": 0.14346012973809613, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ny", "task": "translation", "metric": "chrf", - "score": 0.1651800705978423, - "sentence_nr": 13 + "score": 0.3278530649792867, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", "task": "translation", "metric": "bleu", - "score": 0.13150403915662862, - "sentence_nr": 13 + "score": 0.44161578383393324, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", "task": "translation", "metric": "chrf", - "score": 0.20736628090200235, - "sentence_nr": 13 + "score": 0.6271889973227618, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", "task": "translation", "metric": "bleu", - "score": 0.11824658049755846, - "sentence_nr": 13 + "score": 0.020710959564793303, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", "task": "translation", "metric": "chrf", - "score": 0.2047497542808756, - "sentence_nr": 13 + "score": 0.17448216344563025, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", "task": "translation", "metric": "bleu", - "score": 0.1461072488843534, - "sentence_nr": 13 + "score": 0.2803509486829134, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", "task": "translation", "metric": "chrf", - "score": 0.1946917085815184, - "sentence_nr": 13 + "score": 0.4797524025621454, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", "task": "translation", "metric": "bleu", - "score": 0.1018151014848322, - "sentence_nr": 13 + "score": 0.19552775795890473, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", "task": "translation", "metric": "chrf", - "score": 0.14524830913329922, - "sentence_nr": 13 + "score": 0.3925483761400883, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", "task": "translation", "metric": "bleu", - "score": 0.2589080403198245, - "sentence_nr": 14 + "score": 0.24101134936111826, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", "task": "translation", "metric": "chrf", - "score": 0.2189767496390278, - "sentence_nr": 14 + "score": 0.42220180022908466, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "so", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 + "score": 0.22800980663874482, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "so", "task": "translation", "metric": "chrf", - "score": 0.09761931247072746, - "sentence_nr": 14 + "score": 0.4539695239053247, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "so", "task": "translation", "metric": "bleu", - "score": 0.1397102655312677, - "sentence_nr": 14 + "score": 0.21943479669895977, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "so", "task": "translation", "metric": "chrf", - "score": 0.1326689502117876, - "sentence_nr": 14 + "score": 0.43919819731535836, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "so", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 14 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.167569694983793, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "so", "task": "translation", "metric": "chrf", - "score": 0.15848968577272604, - "sentence_nr": 14 + "score": 0.15018919509760256, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", "task": "translation", "metric": "bleu", - "score": 0.24447662789322752, - "sentence_nr": 14 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", "task": "translation", "metric": "chrf", - "score": 0.20665940380705064, - "sentence_nr": 14 + "score": 0.1540492458272462, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "so", "task": "translation", "metric": "bleu", - "score": 0.18243716955007858, - "sentence_nr": 14 + "score": 0.27133492648358953, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "so", "task": "translation", "metric": "chrf", - "score": 0.16168125580314086, - "sentence_nr": 14 + "score": 0.4876773846207858, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "so", "task": "translation", "metric": "bleu", - "score": 0.2450013599045987, - "sentence_nr": 14 + "score": 0.2637525786875995, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "so", "task": "translation", "metric": "chrf", - "score": 0.20901732384345645, - "sentence_nr": 14 + "score": 0.44971099880168447, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "so", "task": "translation", "metric": "bleu", - "score": 0.20222677481313764, - "sentence_nr": 14 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "so", "task": "translation", "metric": "chrf", - "score": 0.18492694642397273, - "sentence_nr": 14 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", "task": "translation", "metric": "bleu", - "score": 0.18243716955007863, - "sentence_nr": 14 + "score": 0.29126430238399986, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", "task": "translation", "metric": "chrf", - "score": 0.16667457585564618, - "sentence_nr": 14 + "score": 0.48487518930512413, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "so", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 + "score": 0.0493406602622155, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "so", "task": "translation", "metric": "chrf", - "score": 0.08556679632324991, - "sentence_nr": 14 + "score": 0.23818177576740765, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "so", "task": "translation", "metric": "bleu", - "score": 0.1575852366903021, - "sentence_nr": 14 + "score": 0.09711337436010993, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "so", "task": "translation", "metric": "chrf", - "score": 0.1474874322154398, - "sentence_nr": 14 + "score": 0.2876823823317057, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "mag", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 14 + "score": 0.2364242732935431, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "mag", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "score": 0.5166025885857578, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", "task": "translation", "metric": "bleu", - "score": 0.9202663016973823, - "sentence_nr": 14 + "score": 0.21954964295787202, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", "task": "translation", "metric": "chrf", - "score": 0.9263876898254182, - "sentence_nr": 14 + "score": 0.48436759393641593, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 14 + "score": 0.5054426458074261, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "score": 0.6722694706437392, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", "task": "translation", "metric": "bleu", - "score": 0.8621431910551439, - "sentence_nr": 14 + "score": 0.39300609312334356, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", "task": "translation", "metric": "chrf", - "score": 0.8363304387269249, - "sentence_nr": 14 + "score": 0.6127424804854223, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", "task": "translation", "metric": "bleu", - "score": 0.9419492177147062, - "sentence_nr": 14 + "score": 0.3677920388643988, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", "task": "translation", "metric": "chrf", - "score": 0.9202237383102091, - "sentence_nr": 14 + "score": 0.5936461766226937, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", "task": "translation", "metric": "bleu", - "score": 0.6656058483395763, - "sentence_nr": 14 + "score": 0.38663642984572166, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", "task": "translation", "metric": "chrf", - "score": 0.6306557167105028, - "sentence_nr": 14 + "score": 0.6246274082500232, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "mag", "task": "translation", "metric": "bleu", - "score": 0.8657947138469048, - "sentence_nr": 14 + "score": 0.5306942251812361, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8367521498141209, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9419492177147062, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "mag", "task": "translation", "metric": "chrf", - "score": 0.9202237383102091, - "sentence_nr": 14 + "score": 0.7085606774707288, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "mag", "task": "translation", "metric": "bleu", - "score": 0.6993348038140574, - "sentence_nr": 14 + "score": 0.0, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "mag", "task": "translation", "metric": "chrf", - "score": 0.6335836519040372, - "sentence_nr": 14 + "score": 0.000841750841750842, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "mag", "task": "translation", "metric": "bleu", "score": 0, - "sentence_nr": 14 + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "mag", "task": "translation", "metric": "chrf", "score": 0.0, - "sentence_nr": 14 + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 14 + "score": 0.5173149058064286, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "score": 0.7076921197210724, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", "task": "translation", "metric": "bleu", - "score": 0.9419492177147062, - "sentence_nr": 14 + "score": 0.3348485220754285, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", "task": "translation", "metric": "chrf", - "score": 0.9202237383102091, - "sentence_nr": 14 + "score": 0.5526454143704483, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", "task": "translation", "metric": "bleu", - "score": 0.11064738383914807, - "sentence_nr": 14 + "score": 0.4490860834534268, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", "task": "translation", "metric": "chrf", - "score": 0.12449466772796605, - "sentence_nr": 14 + "score": 0.6673191432059211, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", "task": "translation", "metric": "bleu", - "score": 0.12222372495044852, - "sentence_nr": 14 + "score": 0.33382920003857136, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", "task": "translation", "metric": "chrf", - "score": 0.12383047729216191, - "sentence_nr": 14 + "score": 0.5343019280932326, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", "task": "translation", "metric": "bleu", - "score": 0.1392580908972882, - "sentence_nr": 14 + "score": 0.3885821466849501, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", "task": "translation", "metric": "chrf", - "score": 0.1333265070823728, - "sentence_nr": 14 + "score": 0.5985448528428169, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "sr", "task": "translation", "metric": "bleu", - "score": 0.07717159074475938, - "sentence_nr": 14 + "score": 0.35162367832688185, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "sr", "task": "translation", "metric": "chrf", - "score": 0.09413026539458375, - "sentence_nr": 14 + "score": 0.5470403853789135, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", "task": "translation", "metric": "bleu", - "score": 0.16807498532991816, - "sentence_nr": 14 + "score": 0.3690613106650631, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", "task": "translation", "metric": "chrf", - "score": 0.16404257857373192, - "sentence_nr": 14 + "score": 0.5679268631651113, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", "task": "translation", "metric": "bleu", - "score": 0.21005284223037346, - "sentence_nr": 14 + "score": 0.36458100799846727, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", "task": "translation", "metric": "chrf", - "score": 0.1679703861465872, - "sentence_nr": 14 + "score": 0.556692655226023, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", "task": "translation", "metric": "bleu", - "score": 0.0951509584925814, - "sentence_nr": 14 + "score": 0.3922851712831046, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", "task": "translation", "metric": "chrf", - "score": 0.12014553061064691, - "sentence_nr": 14 + "score": 0.6222668712396683, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", "task": "translation", "metric": "bleu", - "score": 0.11737915185320068, - "sentence_nr": 14 + "score": 0.3615358594548135, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", "task": "translation", "metric": "chrf", - "score": 0.10085050674562507, - "sentence_nr": 14 + "score": 0.5764831888443301, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", "task": "translation", "metric": "bleu", - "score": 0.11377195287577829, - "sentence_nr": 14 + "score": 0.2709924119064872, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", "task": "translation", "metric": "chrf", - "score": 0.1301681094143453, - "sentence_nr": 14 + "score": 0.4882782050991019, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "sr", "task": "translation", "metric": "bleu", - "score": 0.09455636771034115, - "sentence_nr": 14 + "score": 0.22089230502491558, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "sr", "task": "translation", "metric": "chrf", - "score": 0.11463120929696417, - "sentence_nr": 14 + "score": 0.4677450306632732, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", "task": "translation", "metric": "bleu", - "score": 0.1544787887603271, - "sentence_nr": 14 + "score": 0.3192042127191187, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", "task": "translation", "metric": "chrf", - "score": 0.1384236976807813, - "sentence_nr": 14 + "score": 0.5199223947442019, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 14 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", "task": "translation", "metric": "chrf", - "score": 0.11488572123868507, - "sentence_nr": 14 + "score": 0.0015001500150015, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", "task": "translation", "metric": "bleu", - "score": 0.1455973492295447, - "sentence_nr": 14 + "score": 0.39061033693599795, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", "task": "translation", "metric": "chrf", - "score": 0.13735441291745387, - "sentence_nr": 14 + "score": 0.5825589467646037, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", "task": "translation", "metric": "bleu", - "score": 0.20255423961944058, - "sentence_nr": 14 + "score": 0.25066959615472983, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", "task": "translation", "metric": "chrf", - "score": 0.205408273869532, - "sentence_nr": 14 + "score": 0.4464863544842361, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", "task": "translation", "metric": "bleu", - "score": 0.11470196605012067, - "sentence_nr": 14 + "score": 0.24634920227044405, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", "task": "translation", "metric": "chrf", - "score": 0.0960438892364715, - "sentence_nr": 14 + "score": 0.4663964950094987, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "si", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 + "score": 0.2030779777377279, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "si", "task": "translation", "metric": "chrf", - "score": 0.07184436307032757, - "sentence_nr": 14 + "score": 0.433265414942881, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", "task": "translation", "metric": "bleu", - "score": 0.20378989148152887, - "sentence_nr": 14 + "score": 0.18149017327984754, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", "task": "translation", "metric": "chrf", - "score": 0.16337212771611656, - "sentence_nr": 14 + "score": 0.40703450831310045, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", "task": "translation", "metric": "bleu", - "score": 0.09669863605676213, - "sentence_nr": 14 + "score": 0.13329022775771593, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", "task": "translation", "metric": "chrf", - "score": 0.10886215421099144, - "sentence_nr": 14 + "score": 0.3376146076257409, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", "task": "translation", "metric": "bleu", - "score": 0.18171364159867548, - "sentence_nr": 14 + "score": 0.058929736139753434, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", "task": "translation", "metric": "chrf", - "score": 0.16245793974098002, - "sentence_nr": 14 + "score": 0.264080458624048, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", "task": "translation", "metric": "bleu", - "score": 0.07562263205281951, - "sentence_nr": 14 + "score": 0.3173813158730172, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", "task": "translation", "metric": "chrf", - "score": 0.09819928715831736, - "sentence_nr": 14 + "score": 0.4928463609252142, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "si", "task": "translation", "metric": "bleu", - "score": 0.1430606569063152, - "sentence_nr": 14 + "score": 0.305374145776946, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "si", "task": "translation", "metric": "chrf", - "score": 0.128073928655324, - "sentence_nr": 14 + "score": 0.510534704480906, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "si", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "si", "task": "translation", "metric": "chrf", - "score": 0.09526781380423786, - "sentence_nr": 14 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", "task": "translation", "metric": "bleu", - "score": 0.18223449608285797, - "sentence_nr": 14 + "score": 0.20642594358613336, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", "task": "translation", "metric": "chrf", - "score": 0.17127401148639734, - "sentence_nr": 14 + "score": 0.4167457729059204, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 14 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", "task": "translation", "metric": "chrf", - "score": 0.09855718610544388, - "sentence_nr": 14 + "score": 0.010856884256461188, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "si", "task": "translation", "metric": "bleu", - "score": 0.09669863605676213, - "sentence_nr": 14 + "score": 0.23736991525740553, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "si", "task": "translation", "metric": "chrf", - "score": 0.11679541132562438, - "sentence_nr": 14 + "score": 0.41867456253450896, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", "task": "translation", "metric": "bleu", - "score": 0.09643517424337235, - "sentence_nr": 14 + "score": 0.3969548673353603, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", "task": "translation", "metric": "chrf", - "score": 0.1226126790254367, - "sentence_nr": 14 + "score": 0.6084494342072353, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", "task": "translation", "metric": "bleu", - "score": 0.3969253441303859, - "sentence_nr": 15 + "score": 0.44834209038718303, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", "task": "translation", "metric": "chrf", - "score": 0.43277080710930865, - "sentence_nr": 15 + "score": 0.6192927072328505, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "km", "task": "translation", "metric": "bleu", - "score": 0.26887073704667247, - "sentence_nr": 15 + "score": 0.4059702785610718, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "km", "task": "translation", "metric": "chrf", - "score": 0.2918476164856665, - "sentence_nr": 15 + "score": 0.5924126044868774, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "km", "task": "translation", "metric": "bleu", - "score": 0.5183146371291372, - "sentence_nr": 15 + "score": 0.4845230115211525, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "km", "task": "translation", "metric": "chrf", - "score": 0.5942793492554739, - "sentence_nr": 15 + "score": 0.6342072643481442, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "km", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 15 + "score": 0.32203015680943314, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.028864519535915668, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13535086012687783, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "km", "task": "translation", "metric": "chrf", - "score": 0.29687399422087424, - "sentence_nr": 15 + "score": 0.529821905278818, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", "task": "translation", "metric": "bleu", - "score": 0.002376388269368755, - "sentence_nr": 15 + "score": 0.22666412589302412, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", "task": "translation", "metric": "chrf", - "score": 0.04574695485583133, - "sentence_nr": 15 + "score": 0.40566177288010574, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "km", "task": "translation", "metric": "bleu", - "score": 0.2703094106380642, - "sentence_nr": 15 + "score": 0.5013254539312615, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "km", "task": "translation", "metric": "chrf", - "score": 0.2982249908859, - "sentence_nr": 15 + "score": 0.667586765919732, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "km", "task": "translation", "metric": "bleu", - "score": 0.29313061087267483, - "sentence_nr": 15 + "score": 0.449699837581857, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "km", "task": "translation", "metric": "chrf", - "score": 0.30295384730328956, - "sentence_nr": 15 + "score": 0.5866210246057594, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "km", "task": "translation", "metric": "bleu", - "score": 0.284911205299835, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "km", "task": "translation", "metric": "chrf", - "score": 0.32067889250923776, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", "task": "translation", "metric": "bleu", - "score": 0.29353055611145706, - "sentence_nr": 15 + "score": 0.4570312379742113, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", "task": "translation", "metric": "chrf", - "score": 0.3381266475327612, - "sentence_nr": 15 + "score": 0.6515364604825435, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "km", "task": "translation", "metric": "bleu", - "score": 0.09910529437987022, - "sentence_nr": 15 + "score": 0.06170226864451068, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "km", "task": "translation", "metric": "chrf", - "score": 0.2511990291834263, - "sentence_nr": 15 + "score": 0.2910964368746218, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "km", "task": "translation", "metric": "bleu", - "score": 0.295394335805579, - "sentence_nr": 15 + "score": 0.2303962170230739, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "km", "task": "translation", "metric": "chrf", - "score": 0.362515947701148, - "sentence_nr": 15 + "score": 0.4892587840921042, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "hne", "task": "translation", "metric": "bleu", - "score": 0.7243776840931383, - "sentence_nr": 15 + "score": 0.4540422742824559, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "hne", "task": "translation", "metric": "chrf", - "score": 0.8980107630353439, - "sentence_nr": 15 + "score": 0.6751320303512911, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 15 + "score": 0.5339026027654551, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "score": 0.7303050277242, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", "task": "translation", "metric": "bleu", - "score": 0.9325718821645923, - "sentence_nr": 15 + "score": 0.45382991587984656, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", "task": "translation", "metric": "chrf", - "score": 0.9490053815176721, - "sentence_nr": 15 + "score": 0.6795124822993059, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", "task": "translation", "metric": "bleu", - "score": 0.6653044831075519, - "sentence_nr": 15 + "score": 0.454110885283082, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", "task": "translation", "metric": "chrf", - "score": 0.7986980418662383, - "sentence_nr": 15 + "score": 0.6757624175597331, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", "task": "translation", "metric": "bleu", - "score": 0.8504591592783618, - "sentence_nr": 15 + "score": 0.4142879154157889, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", "task": "translation", "metric": "chrf", - "score": 0.8980107630353439, - "sentence_nr": 15 + "score": 0.6227005613083273, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", "task": "translation", "metric": "bleu", - "score": 0.5950322600507224, - "sentence_nr": 15 + "score": 0.3673143858550097, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", "task": "translation", "metric": "chrf", - "score": 0.7090542316843602, - "sentence_nr": 15 + "score": 0.5999948756290627, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "hne", "task": "translation", "metric": "bleu", - "score": 0.44768974737795825, - "sentence_nr": 15 + "score": 0.5933908752486619, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.45520472994232203, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6509298345623671, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "hne", "task": "translation", "metric": "chrf", - "score": 0.7962234681835563, - "sentence_nr": 15 + "score": 0.7521076687486573, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "hne", "task": "translation", "metric": "bleu", - "score": 0.41813929088914065, - "sentence_nr": 15 + "score": 0.5389195071775074, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "hne", "task": "translation", "metric": "chrf", - "score": 0.4779008399806691, - "sentence_nr": 15 + "score": 0.7120805996785787, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "hne", "task": "translation", "metric": "bleu", - "score": 0.7243776840931383, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "hne", "task": "translation", "metric": "chrf", - "score": 0.8642805496461259, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", "task": "translation", "metric": "bleu", - "score": 0.9506885335787997, - "sentence_nr": 15 + "score": 0.5469323345187914, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", "task": "translation", "metric": "chrf", - "score": 0.9606382935593174, - "sentence_nr": 15 + "score": 0.7053301383823619, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", "task": "translation", "metric": "bleu", - "score": 0.8665175293126633, - "sentence_nr": 15 + "score": 0.2232040735029205, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", "task": "translation", "metric": "chrf", - "score": 0.8642805496461259, - "sentence_nr": 15 + "score": 0.5082535342764724, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", "task": "translation", "metric": "bleu", - "score": 0.342569723746894, - "sentence_nr": 15 + "score": 0.2603408634436383, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", "task": "translation", "metric": "chrf", - "score": 0.47156710056973744, - "sentence_nr": 15 + "score": 0.5712310736675958, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", "task": "translation", "metric": "bleu", - "score": 0.2319934375578505, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", "task": "translation", "metric": "chrf", - "score": 0.3367678538644817, - "sentence_nr": 15 + "score": 0.1518030911347623, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", "task": "translation", "metric": "bleu", - "score": 0.2261681529206079, - "sentence_nr": 15 + "score": 0.03947087289497203, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", "task": "translation", "metric": "chrf", - "score": 0.2647144854968396, - "sentence_nr": 15 + "score": 0.24871772534163297, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "fuv", "task": "translation", "metric": "bleu", - "score": 0.35554722872430145, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "fuv", "task": "translation", "metric": "chrf", - "score": 0.38873710544604445, - "sentence_nr": 15 + "score": 0.15503659808176187, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", "task": "translation", "metric": "bleu", - "score": 0.3720000272862786, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", "task": "translation", "metric": "chrf", - "score": 0.44695658930348453, - "sentence_nr": 15 + "score": 0.1587741341654334, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", "task": "translation", "metric": "bleu", - "score": 0.4118588818865406, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", "task": "translation", "metric": "chrf", - "score": 0.48573453292579605, - "sentence_nr": 15 + "score": 0.16240427228171622, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", "task": "translation", "metric": "bleu", - "score": 0.2998354233286452, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", "task": "translation", "metric": "chrf", - "score": 0.41144215385645566, - "sentence_nr": 15 + "score": 0.15821359701915677, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", "task": "translation", "metric": "bleu", - "score": 0.42142495511264777, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", "task": "translation", "metric": "chrf", - "score": 0.49708063531780444, - "sentence_nr": 15 + "score": 0.1999554138760155, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", "task": "translation", "metric": "bleu", - "score": 0.33296735510279596, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", "task": "translation", "metric": "chrf", - "score": 0.4176386300927819, - "sentence_nr": 15 + "score": 0.15373049900664998, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "fuv", "task": "translation", "metric": "bleu", - "score": 0.32522259162581857, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "fuv", "task": "translation", "metric": "chrf", - "score": 0.3572499606049779, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", "task": "translation", "metric": "bleu", - "score": 0.3449668516380805, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", "task": "translation", "metric": "chrf", - "score": 0.4341194278942322, - "sentence_nr": 15 + "score": 0.15710519562472744, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", "task": "translation", "metric": "bleu", - "score": 0.36161896085795575, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", "task": "translation", "metric": "chrf", - "score": 0.5052818563161547, - "sentence_nr": 15 + "score": 0.16203519855642107, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", "task": "translation", "metric": "bleu", - "score": 0.2798191316489921, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", "task": "translation", "metric": "chrf", - "score": 0.31866179281073254, - "sentence_nr": 15 + "score": 0.15948434818675836, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", "task": "translation", "metric": "bleu", - "score": 0.3170440263520106, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", "task": "translation", "metric": "chrf", - "score": 0.45327673850268096, - "sentence_nr": 15 + "score": 0.0019920318725099606, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", "task": "translation", "metric": "bleu", - "score": 0.15538140800156827, - "sentence_nr": 15 + "score": 0.3259608048468566, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", "task": "translation", "metric": "chrf", - "score": 0.22365453282977818, - "sentence_nr": 15 + "score": 0.546117067949716, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "zu", "task": "translation", "metric": "bleu", - "score": 0.1352815632479558, - "sentence_nr": 15 + "score": 0.1658740169858733, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "zu", "task": "translation", "metric": "chrf", - "score": 0.2610624350708668, - "sentence_nr": 15 + "score": 0.4024808935109278, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", "task": "translation", "metric": "bleu", - "score": 0.35907597395908514, - "sentence_nr": 15 + "score": 0.20156158538354524, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", "task": "translation", "metric": "chrf", - "score": 0.433310273977633, - "sentence_nr": 15 + "score": 0.4362539345951223, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", "task": "translation", "metric": "bleu", - "score": 0.33498522957587384, - "sentence_nr": 15 + "score": 0.16957342631054367, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", "task": "translation", "metric": "chrf", - "score": 0.4529680464694055, - "sentence_nr": 15 + "score": 0.4366938165395205, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", "task": "translation", "metric": "bleu", - "score": 0.30675389390381064, - "sentence_nr": 15 + "score": 0.139369666953945, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", "task": "translation", "metric": "chrf", - "score": 0.49190118767827684, - "sentence_nr": 15 + "score": 0.3719644893339326, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", "task": "translation", "metric": "bleu", - "score": 0.13922661372145656, - "sentence_nr": 15 + "score": 0.312443507311478, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", "task": "translation", "metric": "chrf", - "score": 0.27553494979330584, - "sentence_nr": 15 + "score": 0.5626555260532313, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", "task": "translation", "metric": "bleu", - "score": 0.3515170550015674, - "sentence_nr": 15 + "score": 0.280153937179511, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", "task": "translation", "metric": "chrf", - "score": 0.37881852198491145, - "sentence_nr": 15 + "score": 0.5315975363727116, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "zu", "task": "translation", "metric": "bleu", - "score": 0.30950829536527374, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "zu", "task": "translation", "metric": "chrf", - "score": 0.3839157172568008, - "sentence_nr": 15 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", "task": "translation", "metric": "bleu", - "score": 0.22141947821999777, - "sentence_nr": 15 + "score": 0.25850597630381367, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", "task": "translation", "metric": "chrf", - "score": 0.3633108862011865, - "sentence_nr": 15 + "score": 0.5200460018701456, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", "task": "translation", "metric": "bleu", - "score": 0.2957849631521743, - "sentence_nr": 15 + "score": 0.00503842243804861, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", "task": "translation", "metric": "chrf", - "score": 0.2872269269040579, - "sentence_nr": 15 + "score": 0.07086730383931879, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", "task": "translation", "metric": "bleu", - "score": 0.19474118932727338, - "sentence_nr": 15 + "score": 0.22826305620809492, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", "task": "translation", "metric": "chrf", - "score": 0.3257294949902081, - "sentence_nr": 15 + "score": 0.41816062347701055, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", "task": "translation", "metric": "bleu", - "score": 0.26505727008662233, - "sentence_nr": 15 + "score": 0.28031528470622435, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", "task": "translation", "metric": "chrf", - "score": 0.41342120940573923, - "sentence_nr": 15 + "score": 0.5080912630709646, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", "task": "translation", "metric": "bleu", - "score": 0.5489548889989204, - "sentence_nr": 16 + "score": 0.43161598042102073, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", "task": "translation", "metric": "chrf", - "score": 0.5292552311493306, - "sentence_nr": 16 + "score": 0.575098943836209, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "kk", "task": "translation", "metric": "bleu", - "score": 0.43141660874998483, - "sentence_nr": 16 + "score": 0.23322806032691942, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "kk", "task": "translation", "metric": "chrf", - "score": 0.4251732952639193, - "sentence_nr": 16 + "score": 0.4970157115640211, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "kk", "task": "translation", "metric": "bleu", - "score": 0.479859141564773, - "sentence_nr": 16 + "score": 0.18831548712362461, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "kk", "task": "translation", "metric": "chrf", - "score": 0.47978767796651084, - "sentence_nr": 16 + "score": 0.4627923572183501, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "kk", "task": "translation", "metric": "bleu", - "score": 0.2751349202729036, - "sentence_nr": 16 + "score": 0.2774144264403638, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.311148395820729, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5435154526669127, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "kk", "task": "translation", "metric": "chrf", - "score": 0.5395341377171525, - "sentence_nr": 16 + "score": 0.4953240887614079, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", "task": "translation", "metric": "bleu", - "score": 0.5777979902630328, - "sentence_nr": 16 + "score": 0.2542725044959704, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", "task": "translation", "metric": "chrf", - "score": 0.6331337405946555, - "sentence_nr": 16 + "score": 0.47939306548471916, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "kk", "task": "translation", "metric": "bleu", - "score": 0.6121338866063298, - "sentence_nr": 16 + "score": 0.5179684763994646, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "kk", "task": "translation", "metric": "chrf", - "score": 0.6222767269627676, - "sentence_nr": 16 + "score": 0.6594622476886304, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", "task": "translation", "metric": "bleu", - "score": 0.5440627210252523, - "sentence_nr": 16 + "score": 0.508262827374593, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", "task": "translation", "metric": "chrf", - "score": 0.5801365308278273, - "sentence_nr": 16 + "score": 0.6484865815175519, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "kk", "task": "translation", "metric": "bleu", - "score": 0.5097049681318312, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "kk", "task": "translation", "metric": "chrf", - "score": 0.5622473457673939, - "sentence_nr": 16 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", "task": "translation", "metric": "bleu", - "score": 0.42567378467735034, - "sentence_nr": 16 + "score": 0.466089579180166, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", "task": "translation", "metric": "chrf", - "score": 0.470165978205223, - "sentence_nr": 16 + "score": 0.6166427184736345, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "kk", "task": "translation", "metric": "bleu", - "score": 0.47594607773277786, - "sentence_nr": 16 + "score": 0.0894726048800864, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "kk", "task": "translation", "metric": "chrf", - "score": 0.5363851621507516, - "sentence_nr": 16 + "score": 0.32040245674130735, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "kk", "task": "translation", "metric": "bleu", - "score": 0.4533373633026252, - "sentence_nr": 16 + "score": 0.30154136324161096, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "kk", "task": "translation", "metric": "chrf", - "score": 0.5042718376547173, - "sentence_nr": 16 + "score": 0.5214624883135771, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "cs", "task": "translation", "metric": "bleu", - "score": 0.8509306641805077, - "sentence_nr": 16 + "score": 0.32434551072073575, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "cs", "task": "translation", "metric": "chrf", - "score": 0.9162670716850285, - "sentence_nr": 16 + "score": 0.5461576315951293, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 16 + "score": 0.417842986003915, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "score": 0.6235945624226917, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", "task": "translation", "metric": "bleu", - "score": 0.9682566771439106, - "sentence_nr": 16 + "score": 0.4684582258807146, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", "task": "translation", "metric": "chrf", - "score": 0.9779127328168863, - "sentence_nr": 16 + "score": 0.6509870807795504, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", "task": "translation", "metric": "bleu", - "score": 0.7040822331405046, - "sentence_nr": 16 + "score": 0.4684582258807146, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", "task": "translation", "metric": "chrf", - "score": 0.7673268835807536, - "sentence_nr": 16 + "score": 0.6509870807795504, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", "task": "translation", "metric": "bleu", - "score": 0.8509306641805077, - "sentence_nr": 16 + "score": 0.2755812609839064, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", "task": "translation", "metric": "chrf", - "score": 0.9162670716850285, - "sentence_nr": 16 + "score": 0.5308706218209139, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", "task": "translation", "metric": "bleu", - "score": 0.7639225615341296, - "sentence_nr": 16 + "score": 0.36065675483488874, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", "task": "translation", "metric": "chrf", - "score": 0.8135226479972402, - "sentence_nr": 16 + "score": 0.5848675866219151, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "cs", "task": "translation", "metric": "bleu", - "score": 0.6729400620282456, - "sentence_nr": 16 + "score": 0.4628530384876385, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7157738382386983, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "cs", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "score": 0.6179864674944966, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "cs", "task": "translation", "metric": "bleu", - "score": 0.6736973998414632, - "sentence_nr": 16 + "score": 0.4307886337606128, + "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "cs", "task": "translation", "metric": "chrf", - "score": 0.7157738382386983, - "sentence_nr": 16 + "score": 0.6064479768085549, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "cs", "task": "translation", "metric": "bleu", - "score": 0.7640211005075139, - "sentence_nr": 16 + "score": 0.0, + "sentence_nr": 0 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "cs", "task": "translation", "metric": "chrf", - "score": 0.8179683170395244, - "sentence_nr": 16 + "score": 0.09069594593173218, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 16 + "score": 0.4716885638382066, + "sentence_nr": 0 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "score": 0.6714009080674107, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", "task": "translation", "metric": "bleu", - "score": 0.8509306641805077, - "sentence_nr": 16 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", "task": "translation", "metric": "chrf", - "score": 0.9162670716850285, - "sentence_nr": 16 + "score": 0.37155017280771785, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", "task": "translation", "metric": "bleu", - "score": 0.43141660874998483, - "sentence_nr": 16 + "score": 0.4025210047052182, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", "task": "translation", "metric": "chrf", - "score": 0.45005622460103567, - "sentence_nr": 16 + "score": 0.5985194736943847, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", "task": "translation", "metric": "bleu", - "score": 0.5269212212163125, - "sentence_nr": 16 + "score": 0.37892189586155534, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", "task": "translation", "metric": "chrf", - "score": 0.5528502361092263, - "sentence_nr": 16 + "score": 0.5908052258359918, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", "task": "translation", "metric": "bleu", - "score": 0.6736973998414632, - "sentence_nr": 16 + "score": 0.3354557799221337, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", "task": "translation", "metric": "chrf", - "score": 0.7157738382386983, - "sentence_nr": 16 + "score": 0.5828616357620534, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "sv", "task": "translation", "metric": "bleu", - "score": 0.33491174038847354, - "sentence_nr": 16 + "score": 0.38540591321276524, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "sv", "task": "translation", "metric": "chrf", - "score": 0.3646077683106875, - "sentence_nr": 16 + "score": 0.6091815498132347, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", "task": "translation", "metric": "bleu", - "score": 0.450293182440332, - "sentence_nr": 16 + "score": 0.36977252605532496, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", "task": "translation", "metric": "chrf", - "score": 0.4822292034174927, - "sentence_nr": 16 + "score": 0.5998039337990848, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", "task": "translation", "metric": "bleu", - "score": 0.19834633509680927, - "sentence_nr": 16 + "score": 0.3322929293409608, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", "task": "translation", "metric": "chrf", - "score": 0.2712763621688402, - "sentence_nr": 16 + "score": 0.5804214136040975, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", "task": "translation", "metric": "bleu", - "score": 0.546749262754264, - "sentence_nr": 16 + "score": 0.32948276574633206, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", "task": "translation", "metric": "chrf", - "score": 0.5830342194369027, - "sentence_nr": 16 + "score": 0.5579608060488221, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", "task": "translation", "metric": "bleu", - "score": 0.2754139367364165, - "sentence_nr": 16 + "score": 0.40748672718731094, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", "task": "translation", "metric": "chrf", - "score": 0.34665831783057166, - "sentence_nr": 16 + "score": 0.6185219751228138, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", "task": "translation", "metric": "bleu", - "score": 0.42877544777223947, - "sentence_nr": 16 + "score": 0.41391222112524345, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", "task": "translation", "metric": "chrf", - "score": 0.43803970127356867, - "sentence_nr": 16 + "score": 0.6185000018140083, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "sv", "task": "translation", "metric": "bleu", - "score": 0.43908893511874636, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "sv", "task": "translation", "metric": "chrf", - "score": 0.4785460996828672, - "sentence_nr": 16 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", "task": "translation", "metric": "bleu", - "score": 0.5898466143484524, - "sentence_nr": 16 + "score": 0.37444352680714255, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", "task": "translation", "metric": "chrf", - "score": 0.6611594562951559, - "sentence_nr": 16 + "score": 0.5777605467565269, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", "task": "translation", "metric": "bleu", - "score": 0.44701416909786756, - "sentence_nr": 16 + "score": 0.215971435415919, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", "task": "translation", "metric": "chrf", - "score": 0.5245065297475329, - "sentence_nr": 16 + "score": 0.43504055478394826, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", "task": "translation", "metric": "bleu", - "score": 0.31417347869916407, - "sentence_nr": 16 + "score": 0.36990978753067677, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", "task": "translation", "metric": "chrf", - "score": 0.3530975487930333, - "sentence_nr": 16 + "score": 0.5650271917718249, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", "task": "translation", "metric": "bleu", - "score": 0.6373258340947424, - "sentence_nr": 16 + "score": 0.2913871477484173, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", "task": "translation", "metric": "chrf", - "score": 0.6437421244363288, - "sentence_nr": 16 + "score": 0.4778519392615073, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", "task": "translation", "metric": "bleu", - "score": 0.4715455630189013, - "sentence_nr": 16 + "score": 0.3859000637680225, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", "task": "translation", "metric": "chrf", - "score": 0.543275675805182, - "sentence_nr": 16 + "score": 0.5858315307170925, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "hu", "task": "translation", "metric": "bleu", - "score": 0.2807304798995431, - "sentence_nr": 16 + "score": 0.3738636268027588, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "hu", "task": "translation", "metric": "chrf", - "score": 0.3418543172008782, - "sentence_nr": 16 + "score": 0.5633496484582216, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", "task": "translation", "metric": "bleu", - "score": 0.5397682182130759, - "sentence_nr": 16 + "score": 0.4084190982281481, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", "task": "translation", "metric": "chrf", - "score": 0.5703951757357331, - "sentence_nr": 16 + "score": 0.6045916299793137, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", "task": "translation", "metric": "bleu", - "score": 0.5446420954986508, - "sentence_nr": 16 + "score": 0.23598932454609683, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", "task": "translation", "metric": "chrf", - "score": 0.5662782206307382, - "sentence_nr": 16 + "score": 0.45933330952484963, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", "task": "translation", "metric": "bleu", - "score": 0.3378721588486122, - "sentence_nr": 16 + "score": 0.33832642802358825, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", "task": "translation", "metric": "chrf", - "score": 0.4362453299175689, - "sentence_nr": 16 + "score": 0.5469625730166827, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", "task": "translation", "metric": "bleu", - "score": 0.49288474585647657, - "sentence_nr": 16 + "score": 0.2906205269998709, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", "task": "translation", "metric": "chrf", - "score": 0.5578180330951528, - "sentence_nr": 16 + "score": 0.4738555693911765, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", "task": "translation", "metric": "bleu", - "score": 0.36197274748300795, - "sentence_nr": 16 + "score": 0.28272487839661065, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", "task": "translation", "metric": "chrf", - "score": 0.36134314178088084, - "sentence_nr": 16 + "score": 0.46645581968232397, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "hu", "task": "translation", "metric": "bleu", - "score": 0.17060055774694924, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "hu", "task": "translation", "metric": "chrf", - "score": 0.2566677182784047, - "sentence_nr": 16 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", "task": "translation", "metric": "bleu", - "score": 0.5717883675148524, - "sentence_nr": 16 + "score": 0.3725338410101549, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", "task": "translation", "metric": "chrf", - "score": 0.640780099960748, - "sentence_nr": 16 + "score": 0.5606140962762308, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 16 + "score": 0.11947542811972255, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", "task": "translation", "metric": "chrf", - "score": 0.221071468018936, - "sentence_nr": 16 + "score": 0.38211159102056313, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", "task": "translation", "metric": "bleu", - "score": 0.41620491059292214, - "sentence_nr": 16 + "score": 0.3582495248633888, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", "task": "translation", "metric": "chrf", - "score": 0.4263215396273059, - "sentence_nr": 16 + "score": 0.5337300736591853, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", "task": "translation", "metric": "bleu", - "score": 0.3711481893609263, - "sentence_nr": 16 + "score": 0.3850293035436385, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", "task": "translation", "metric": "chrf", - "score": 0.4101392170618868, - "sentence_nr": 16 + "score": 0.5627108155348461, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", "task": "translation", "metric": "bleu", - "score": 0.7838756540325346, - "sentence_nr": 17 + "score": 0.36775365397595855, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", "task": "translation", "metric": "chrf", - "score": 0.8813081534414112, - "sentence_nr": 17 + "score": 0.5429838358858414, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "el", "task": "translation", "metric": "bleu", - "score": 0.6486802664285581, - "sentence_nr": 17 + "score": 0.3742787779358338, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "el", "task": "translation", "metric": "chrf", - "score": 0.8066891982024211, - "sentence_nr": 17 + "score": 0.5507724077862277, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "el", "task": "translation", "metric": "bleu", - "score": 0.7344798528986015, - "sentence_nr": 17 + "score": 0.36166370485178767, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "el", "task": "translation", "metric": "chrf", - "score": 0.8855631322316195, - "sentence_nr": 17 + "score": 0.5544237122855078, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "el", "task": "translation", "metric": "bleu", - "score": 0.6486802664285581, - "sentence_nr": 17 + "score": 0.22502019314147165, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8585894188661937, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7838756540325346, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "el", "task": "translation", "metric": "chrf", - "score": 0.8813081534414112, - "sentence_nr": 17 + "score": 0.4660050287510241, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", "task": "translation", "metric": "bleu", - "score": 0.8434569599214109, - "sentence_nr": 17 + "score": 0.2611379210999344, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", "task": "translation", "metric": "chrf", - "score": 0.9123500588239437, - "sentence_nr": 17 + "score": 0.5199168221681734, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "el", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 17 + "score": 0.3179668927459568, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "el", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "score": 0.5246182258245259, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "el", "task": "translation", "metric": "bleu", - "score": 0.7849324644314795, - "sentence_nr": 17 + "score": 0.36615448670581846, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "el", "task": "translation", "metric": "chrf", - "score": 0.8934780380564308, - "sentence_nr": 17 + "score": 0.5458789775720151, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "el", "task": "translation", "metric": "bleu", - "score": 0.7838756540325346, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "el", "task": "translation", "metric": "chrf", - "score": 0.8799941663695641, - "sentence_nr": 17 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 17 + "score": 0.4688461566377855, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 17 + "score": 0.6590799487386328, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "el", "task": "translation", "metric": "bleu", - "score": 0.6809354000776107, - "sentence_nr": 17 + "score": 0.2050862226628115, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "el", "task": "translation", "metric": "chrf", - "score": 0.8640242853252401, - "sentence_nr": 17 + "score": 0.4479394296115543, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "el", "task": "translation", "metric": "bleu", - "score": 0.7838756540325346, - "sentence_nr": 17 + "score": 0.362962020143119, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "el", "task": "translation", "metric": "chrf", - "score": 0.8841725044915145, - "sentence_nr": 17 + "score": 0.5507900287805876, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sn", "task": "translation", "metric": "bleu", - "score": 0.39503194300684213, - "sentence_nr": 17 + "score": 0.2042633250999265, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sn", "task": "translation", "metric": "chrf", - "score": 0.6916289318228928, - "sentence_nr": 17 + "score": 0.4024854380942464, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", "task": "translation", "metric": "bleu", - "score": 0.3094285625931604, - "sentence_nr": 17 + "score": 0.21682957830342386, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", "task": "translation", "metric": "chrf", - "score": 0.6328843883953666, - "sentence_nr": 17 + "score": 0.40328066851151617, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", "task": "translation", "metric": "bleu", - "score": 0.30888995556875376, - "sentence_nr": 17 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", "task": "translation", "metric": "chrf", - "score": 0.6801864286113619, - "sentence_nr": 17 + "score": 0.17569336234053629, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 17 + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", "task": "translation", "metric": "chrf", - "score": 0.5512199399393973, - "sentence_nr": 17 + "score": 0.1687808278077641, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", "task": "translation", "metric": "bleu", - "score": 0.45862256824436665, - "sentence_nr": 17 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", "task": "translation", "metric": "chrf", - "score": 0.7660160731572102, - "sentence_nr": 17 + "score": 0.17581812142544698, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", "task": "translation", "metric": "bleu", - "score": 0.47770079267358434, - "sentence_nr": 17 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", "task": "translation", "metric": "chrf", - "score": 0.8053780976175922, - "sentence_nr": 17 + "score": 0.1795359810750476, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "sn", "task": "translation", "metric": "bleu", - "score": 0.6259358824502687, - "sentence_nr": 17 + "score": 0.34908859517231833, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "sn", "task": "translation", "metric": "chrf", - "score": 0.8067950339997761, - "sentence_nr": 17 + "score": 0.5054950855070256, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", "task": "translation", "metric": "bleu", - "score": 0.5296344689827603, - "sentence_nr": 17 + "score": 0.142431283922237, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", "task": "translation", "metric": "chrf", - "score": 0.7183083787484315, - "sentence_nr": 17 + "score": 0.3609780899068605, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "sn", "task": "translation", "metric": "bleu", - "score": 0.7568440125092788, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "sn", "task": "translation", "metric": "chrf", - "score": 0.8347576899702969, - "sentence_nr": 17 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", "task": "translation", "metric": "bleu", - "score": 0.3001800600660342, - "sentence_nr": 17 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", "task": "translation", "metric": "chrf", - "score": 0.6794930944968381, - "sentence_nr": 17 + "score": 0.17861900953769713, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sn", "task": "translation", "metric": "bleu", - "score": 0.18879642915927602, - "sentence_nr": 17 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sn", "task": "translation", "metric": "chrf", - "score": 0.6584653291380502, - "sentence_nr": 17 + "score": 0.09260326033959126, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "sn", "task": "translation", "metric": "bleu", - "score": 0.4697979053121435, - "sentence_nr": 17 + "score": 0.025515907504753448, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7554660353280213, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.3164389365959547, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "sn", "task": "translation", "metric": "chrf", - "score": 0.7121929522648841, - "sentence_nr": 17 + "score": 0.18744523405760177, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", "task": "translation", "metric": "bleu", - "score": 0.6031798395521694, - "sentence_nr": 17 + "score": 0.13989491400872253, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", "task": "translation", "metric": "chrf", - "score": 0.7819677495994619, - "sentence_nr": 17 + "score": 0.4095257685037439, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", "task": "translation", "metric": "bleu", - "score": 0.5646631238098637, - "sentence_nr": 17 + "score": 0.37645276051115606, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", "task": "translation", "metric": "chrf", - "score": 0.836206348617966, - "sentence_nr": 17 + "score": 0.5980216031532829, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ckb", "task": "translation", "metric": "bleu", - "score": 0.36615107686578496, - "sentence_nr": 17 + "score": 0.22917125225310467, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ckb", "task": "translation", "metric": "chrf", - "score": 0.696074520676609, - "sentence_nr": 17 + "score": 0.47903700624901113, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 17 + "score": 0.27318782983287254, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "score": 0.5543770120341358, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", "task": "translation", "metric": "bleu", - "score": 0.1543252261021413, - "sentence_nr": 17 + "score": 0.17948041177954654, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", "task": "translation", "metric": "chrf", - "score": 0.4932064977882042, - "sentence_nr": 17 + "score": 0.479796413592652, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", "task": "translation", "metric": "bleu", - "score": 0.6966863379186454, - "sentence_nr": 17 + "score": 0.06583739589075036, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", "task": "translation", "metric": "chrf", - "score": 0.7941296295595748, - "sentence_nr": 17 + "score": 0.2886125811836185, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", "task": "translation", "metric": "bleu", - "score": 0.5487584440377526, - "sentence_nr": 17 + "score": 0.3285631316988268, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", "task": "translation", "metric": "chrf", - "score": 0.8692797308530646, - "sentence_nr": 17 + "score": 0.5548316779589381, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", "task": "translation", "metric": "bleu", - "score": 0.8787142254774354, - "sentence_nr": 17 + "score": 0.2696625693704166, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", "task": "translation", "metric": "chrf", - "score": 0.944457825946867, - "sentence_nr": 17 + "score": 0.4800260750132835, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "ckb", "task": "translation", "metric": "bleu", - "score": 0.5463887965663883, - "sentence_nr": 17 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "ckb", "task": "translation", "metric": "chrf", - "score": 0.7033378749149323, - "sentence_nr": 17 + "score": 0.31125924230690827, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", "task": "translation", "metric": "bleu", - "score": 0.4912217876159168, - "sentence_nr": 17 + "score": 0.3225412913490767, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", "task": "translation", "metric": "chrf", - "score": 0.7991339910300419, - "sentence_nr": 17 + "score": 0.5444997824452831, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", "task": "translation", "metric": "bleu", - "score": 0.7251215108320924, - "sentence_nr": 17 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", "task": "translation", "metric": "chrf", - "score": 0.8334871013677937, - "sentence_nr": 17 + "score": 0.0013495276653171392, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", "task": "translation", "metric": "bleu", - "score": 0.587725019570444, - "sentence_nr": 17 + "score": 0.15376887027903216, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", "task": "translation", "metric": "chrf", - "score": 0.7957550794048827, - "sentence_nr": 17 + "score": 0.38954501570553435, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", "task": "translation", "metric": "bleu", - "score": 0.28856268147560865, - "sentence_nr": 17 + "score": 0.02228851669741669, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", "task": "translation", "metric": "chrf", - "score": 0.6187787024786685, - "sentence_nr": 17 + "score": 0.16941662225476226, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", "task": "translation", "metric": "bleu", - "score": 0.4402122771181734, - "sentence_nr": 17 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", "task": "translation", "metric": "chrf", - "score": 0.7716344099519011, - "sentence_nr": 17 + "score": 0.15925566245534395, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "rw", "task": "translation", "metric": "bleu", - "score": 0.18465966669442654, - "sentence_nr": 17 + "score": 0.020022039661695485, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "rw", "task": "translation", "metric": "chrf", - "score": 0.503938463452404, - "sentence_nr": 17 + "score": 0.18319796614102749, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", "task": "translation", "metric": "bleu", - "score": 0.17973438065210462, - "sentence_nr": 17 + "score": 0.25977796098643696, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", "task": "translation", "metric": "chrf", - "score": 0.5509051817440759, - "sentence_nr": 17 + "score": 0.45846399974797636, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", "task": "translation", "metric": "bleu", - "score": 0.4809103179432793, - "sentence_nr": 17 + "score": 0.21343334222861665, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", "task": "translation", "metric": "chrf", - "score": 0.7499547288317748, - "sentence_nr": 17 + "score": 0.4071704087995375, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", "task": "translation", "metric": "bleu", - "score": 0.6244070585346295, - "sentence_nr": 17 + "score": 0.03859344502039764, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", "task": "translation", "metric": "chrf", - "score": 0.8433626077474702, - "sentence_nr": 17 + "score": 0.18727863561096483, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", "task": "translation", "metric": "bleu", - "score": 0.43660156107563336, - "sentence_nr": 17 + "score": 0.33330884243765846, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", "task": "translation", "metric": "chrf", - "score": 0.7165816705519701, - "sentence_nr": 17 + "score": 0.5553752919921839, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", "task": "translation", "metric": "bleu", - "score": 0.3748533897614559, - "sentence_nr": 17 + "score": 0.3558905194860598, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", "task": "translation", "metric": "chrf", - "score": 0.6863935447402433, - "sentence_nr": 17 + "score": 0.5530965003702101, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "rw", "task": "translation", "metric": "bleu", - "score": 0.3607442374649342, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "rw", "task": "translation", "metric": "chrf", - "score": 0.6876955247522804, - "sentence_nr": 17 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", "task": "translation", "metric": "bleu", - "score": 0.3718491333506089, - "sentence_nr": 17 + "score": 0.3629548639887491, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", "task": "translation", "metric": "chrf", - "score": 0.6941552634040441, - "sentence_nr": 17 + "score": 0.5921544724176951, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", "task": "translation", "metric": "bleu", - "score": 0.5110976370499285, - "sentence_nr": 17 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", "task": "translation", "metric": "chrf", - "score": 0.842915559657988, - "sentence_nr": 17 + "score": 0.16686725758165746, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", "task": "translation", "metric": "bleu", - "score": 0.5591535564944223, - "sentence_nr": 17 + "score": 0.027711989596895026, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", "task": "translation", "metric": "chrf", - "score": 0.8079980831297509, - "sentence_nr": 17 + "score": 0.18513440795413078, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", "task": "translation", "metric": "bleu", - "score": 0.11809858631445573, - "sentence_nr": 17 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", "task": "translation", "metric": "chrf", - "score": 0.5943886568930294, - "sentence_nr": 17 + "score": 0.18337542465784618, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", "task": "translation", "metric": "bleu", - "score": 0.1423170365140828, - "sentence_nr": 18 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", "task": "translation", "metric": "chrf", - "score": 0.38605131339325, - "sentence_nr": 18 + "score": 0.2738250966440318, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "wo", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 18 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "wo", "task": "translation", "metric": "chrf", - "score": 0.3230989128220882, - "sentence_nr": 18 + "score": 0.17457751379065342, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "wo", "task": "translation", "metric": "bleu", - "score": 0.13860487750886114, - "sentence_nr": 18 + "score": 0.0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "wo", "task": "translation", "metric": "chrf", - "score": 0.36659667376085786, - "sentence_nr": 18 + "score": 0.16758070996072438, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "wo", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 18 + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36295227908523897, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13860487750886114, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "wo", "task": "translation", "metric": "chrf", - "score": 0.36118801210741663, - "sentence_nr": 18 + "score": 0.1648217566450672, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 18 + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", "task": "translation", "metric": "chrf", - "score": 0.40877861250593944, - "sentence_nr": 18 + "score": 0.14937766642643976, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "wo", "task": "translation", "metric": "bleu", - "score": 0.16673024281943524, - "sentence_nr": 18 + "score": 0.1539372614821037, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "wo", "task": "translation", "metric": "chrf", - "score": 0.3975048254243706, - "sentence_nr": 18 + "score": 0.39281132309216027, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", "task": "translation", "metric": "bleu", - "score": 0.11262865194228103, - "sentence_nr": 18 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", "task": "translation", "metric": "chrf", - "score": 0.36030161445252334, - "sentence_nr": 18 + "score": 0.16643025437231165, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "wo", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 18 + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "wo", "task": "translation", "metric": "chrf", - "score": 0.3066941236048102, - "sentence_nr": 18 + "score": 0.16864925448492676, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 18 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 18 + "score": 0.14310977738931444, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "wo", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 18 + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "wo", "task": "translation", "metric": "chrf", - "score": 0.409404483413751, - "sentence_nr": 18 + "score": 0.18336900353303615, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "wo", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 + "score": 0.044083968419034406, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "wo", "task": "translation", "metric": "chrf", - "score": 0.3629681915617596, - "sentence_nr": 18 + "score": 0.1910937333522565, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "aeb", "task": "translation", "metric": "bleu", - "score": 0.12162779391619735, - "sentence_nr": 18 + "score": 0.2364341169976402, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "aeb", "task": "translation", "metric": "chrf", - "score": 0.4136500403395244, - "sentence_nr": 18 + "score": 0.44688068305416384, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", "task": "translation", "metric": "bleu", - "score": 0.1909693288724605, - "sentence_nr": 18 + "score": 0.3275794528853699, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", "task": "translation", "metric": "chrf", - "score": 0.4115524982336727, - "sentence_nr": 18 + "score": 0.490529412998314, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", "task": "translation", "metric": "bleu", - "score": 0.14192760409508295, - "sentence_nr": 18 + "score": 0.3910557548848884, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", "task": "translation", "metric": "chrf", - "score": 0.3989311390496819, - "sentence_nr": 18 + "score": 0.5640234702218941, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", "task": "translation", "metric": "bleu", - "score": 0.20304460086424203, - "sentence_nr": 18 + "score": 0.42567034554082944, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", "task": "translation", "metric": "chrf", - "score": 0.4966336271433132, - "sentence_nr": 18 + "score": 0.559555145699201, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 + "score": 0.35489774041724637, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", "task": "translation", "metric": "chrf", - "score": 0.3935462418730863, - "sentence_nr": 18 + "score": 0.5417379433047397, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 + "score": 0.19928570195743642, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", "task": "translation", "metric": "chrf", - "score": 0.33523829330170474, - "sentence_nr": 18 + "score": 0.4190699838003337, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "aeb", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 + "score": 0.45074421522718355, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "aeb", "task": "translation", "metric": "chrf", - "score": 0.3250861966671464, - "sentence_nr": 18 + "score": 0.611234824040456, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 + "score": 0.37895246763927487, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", "task": "translation", "metric": "chrf", - "score": 0.3051626462022859, - "sentence_nr": 18 + "score": 0.5637705600813122, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "aeb", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 + "score": 0.25982820626438285, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "aeb", "task": "translation", "metric": "chrf", - "score": 0.30944349609311117, - "sentence_nr": 18 + "score": 0.448770635195091, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", "task": "translation", "metric": "bleu", - "score": 0.11556522074454477, - "sentence_nr": 18 + "score": 0.3151111682106363, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", "task": "translation", "metric": "chrf", - "score": 0.372688132616477, - "sentence_nr": 18 + "score": 0.5484564892322974, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "aeb", "task": "translation", "metric": "bleu", - "score": 0.22392361812003433, - "sentence_nr": 18 + "score": 0.19075628574726866, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "aeb", "task": "translation", "metric": "chrf", - "score": 0.460938469666163, - "sentence_nr": 18 + "score": 0.41027751261677925, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "aeb", "task": "translation", "metric": "bleu", - "score": 0.10704943109718215, - "sentence_nr": 18 + "score": 0.3104800728034583, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "aeb", "task": "translation", "metric": "chrf", - "score": 0.362953271903766, - "sentence_nr": 18 + "score": 0.4915887374711213, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", "task": "translation", "metric": "bleu", - "score": 0.14392660099814805, - "sentence_nr": 18 + "score": 0.33493420443764327, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", "task": "translation", "metric": "chrf", - "score": 0.376362134090542, - "sentence_nr": 18 + "score": 0.5494512089523403, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 18 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 + "score": 0.18258051379187495, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ilo", "task": "translation", "metric": "bleu", - "score": 0.11718316363212337, - "sentence_nr": 18 + "score": 0.37144982797514564, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ilo", "task": "translation", "metric": "chrf", - "score": 0.3844506520287143, - "sentence_nr": 18 + "score": 0.5566300649554314, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 + "score": 0.37030779414532766, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", "task": "translation", "metric": "chrf", - "score": 0.4024646900219184, - "sentence_nr": 18 + "score": 0.5686892684950948, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", "task": "translation", "metric": "bleu", - "score": 0.08197539732074254, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.35287478964221025, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 + "score": 0.29047920175774217, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", "task": "translation", "metric": "chrf", - "score": 0.3502198678697797, - "sentence_nr": 18 + "score": 0.49272880198145935, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 + "score": 0.24965028943014114, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", "task": "translation", "metric": "chrf", - "score": 0.376636825008991, - "sentence_nr": 18 + "score": 0.4673190842234536, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 + "score": 0.41178982108320433, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", "task": "translation", "metric": "chrf", - "score": 0.30372034137078635, - "sentence_nr": 18 + "score": 0.609042705412301, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", "task": "translation", "metric": "bleu", - "score": 0.21481172921264619, - "sentence_nr": 18 + "score": 0.30730414574085574, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", "task": "translation", "metric": "chrf", - "score": 0.4009028477501074, - "sentence_nr": 18 + "score": 0.5593126358550683, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "ilo", "task": "translation", "metric": "bleu", - "score": 0.15065778147399764, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "ilo", "task": "translation", "metric": "chrf", - "score": 0.4580508275161034, - "sentence_nr": 18 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", "task": "translation", "metric": "bleu", - "score": 0.21281360709834968, - "sentence_nr": 18 + "score": 0.30557926483272724, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", "task": "translation", "metric": "chrf", - "score": 0.4292702902558381, - "sentence_nr": 18 + "score": 0.5372819180558251, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", "task": "translation", "metric": "bleu", - "score": 0.13780534982274106, - "sentence_nr": 18 + "score": 0.08180491833590103, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", "task": "translation", "metric": "chrf", - "score": 0.3273034480518148, - "sentence_nr": 18 + "score": 0.311232289473797, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 + "score": 0.3063581141268663, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", "task": "translation", "metric": "chrf", - "score": 0.36078900962911326, - "sentence_nr": 18 + "score": 0.49046747666762064, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", "task": "translation", "metric": "bleu", - "score": 0.2491467453273127, - "sentence_nr": 18 + "score": 0.35425134311526146, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", "task": "translation", "metric": "chrf", - "score": 0.47986445165634506, - "sentence_nr": 18 + "score": 0.543224327229853, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", "task": "translation", "metric": "bleu", - "score": 0.10905122148101043, - "sentence_nr": 18 + "score": 0.3446263661390609, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", "task": "translation", "metric": "chrf", - "score": 0.4502571446121065, - "sentence_nr": 18 + "score": 0.5457062469198075, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "xh", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 + "score": 0.1844277711083256, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "xh", "task": "translation", "metric": "chrf", - "score": 0.274959074733397, - "sentence_nr": 18 + "score": 0.39231379751700163, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 + "score": 0.22458010101026027, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", "task": "translation", "metric": "chrf", - "score": 0.3607206140473947, - "sentence_nr": 18 + "score": 0.419073948717225, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", "task": "translation", "metric": "bleu", - "score": 0.17796237395371306, - "sentence_nr": 18 + "score": 0.16129088493851212, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", "task": "translation", "metric": "chrf", - "score": 0.48209511527864385, - "sentence_nr": 18 + "score": 0.3633785507778129, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", "task": "translation", "metric": "bleu", - "score": 0.13644487773607678, - "sentence_nr": 18 + "score": 0.0923267971632956, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", "task": "translation", "metric": "chrf", - "score": 0.36491236604183974, - "sentence_nr": 18 + "score": 0.3309231267378018, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", "task": "translation", "metric": "bleu", - "score": 0.21850577875478958, - "sentence_nr": 18 + "score": 0.3923760262816128, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", "task": "translation", "metric": "chrf", - "score": 0.4494281444270959, - "sentence_nr": 18 + "score": 0.5790825048444385, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 + "score": 0.44439671603383196, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", "task": "translation", "metric": "chrf", - "score": 0.31361999490423276, - "sentence_nr": 18 + "score": 0.6336463934979399, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "xh", "task": "translation", "metric": "bleu", - "score": 0.1222354265296326, - "sentence_nr": 18 + "score": 0.032197629403649654, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "xh", "task": "translation", "metric": "chrf", - "score": 0.3727252294250617, - "sentence_nr": 18 + "score": 0.20237585876837602, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", "task": "translation", "metric": "bleu", - "score": 0.1109484758001971, - "sentence_nr": 18 + "score": 0.3494358555121005, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", "task": "translation", "metric": "chrf", - "score": 0.3612426584883393, - "sentence_nr": 18 + "score": 0.5521153931296311, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", "task": "translation", "metric": "bleu", - "score": 0.20356858406857398, - "sentence_nr": 18 + "score": 0.04211115562584218, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", "task": "translation", "metric": "chrf", - "score": 0.46358366365120834, - "sentence_nr": 18 + "score": 0.21787753877924487, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", "task": "translation", "metric": "bleu", - "score": 0.11530762783711283, - "sentence_nr": 18 + "score": 0.18060798993874613, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", "task": "translation", "metric": "chrf", - "score": 0.3781690117672006, - "sentence_nr": 18 + "score": 0.42347596848174995, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", "task": "translation", "metric": "bleu", - "score": 0.11907182322580316, - "sentence_nr": 18 + "score": 0.10142265089946709, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", "task": "translation", "metric": "chrf", - "score": 0.49599003474365394, - "sentence_nr": 18 + "score": 0.23765231683034127, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", "task": "translation", "metric": "bleu", - "score": 0.4220964985804286, - "sentence_nr": 19 + "score": 0.13326254700682963, + "sentence_nr": 0 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", "task": "translation", "metric": "chrf", - "score": 0.4455062898838481, - "sentence_nr": 19 + "score": 0.37783345527529155, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ti", "task": "translation", "metric": "bleu", - "score": 0.32026140564476524, - "sentence_nr": 19 + "score": 0.0836098993777203, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ti", "task": "translation", "metric": "chrf", - "score": 0.4016870075045671, - "sentence_nr": 19 + "score": 0.28681946123560914, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ti", "task": "translation", "metric": "bleu", - "score": 0.34697616124581016, - "sentence_nr": 19 + "score": 0.0, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ti", "task": "translation", "metric": "chrf", - "score": 0.40373943351486685, - "sentence_nr": 19 + "score": 0.25753830733173966, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ti", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 19 + "score": 0.21097146062542602, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4184617303786878, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "ti", "task": "translation", "metric": "chrf", - "score": 0.4321132548050678, - "sentence_nr": 19 + "score": 0.41556645044647633, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", "task": "translation", "metric": "bleu", - "score": 0.3499900041521066, - "sentence_nr": 19 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", "task": "translation", "metric": "chrf", - "score": 0.3822330369569219, - "sentence_nr": 19 + "score": 0.0013785497656465398, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "ti", "task": "translation", "metric": "bleu", - "score": 0.4220964985804286, - "sentence_nr": 19 + "score": 0.19960018464952517, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "ti", "task": "translation", "metric": "chrf", - "score": 0.4455062898838481, - "sentence_nr": 19 + "score": 0.40487558185186295, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", "task": "translation", "metric": "bleu", - "score": 0.41428013900466737, - "sentence_nr": 19 + "score": 0.15308678224580158, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", "task": "translation", "metric": "chrf", - "score": 0.425713879206717, - "sentence_nr": 19 + "score": 0.3410590522933235, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "ti", "task": "translation", "metric": "bleu", - "score": 0.4184617303786878, - "sentence_nr": 19 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "ti", "task": "translation", "metric": "chrf", - "score": 0.4321132548050678, - "sentence_nr": 19 + "score": 0.04145921595437363, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", "task": "translation", "metric": "bleu", - "score": 0.5102296603076779, - "sentence_nr": 19 + "score": 0.16740774049127094, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", "task": "translation", "metric": "chrf", - "score": 0.5412065437629714, - "sentence_nr": 19 + "score": 0.36106868107953355, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ti", "task": "translation", "metric": "bleu", - "score": 0.32282559495424096, - "sentence_nr": 19 + "score": 0.0, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ti", "task": "translation", "metric": "chrf", - "score": 0.38266426308756574, - "sentence_nr": 19 + "score": 0.1642492535825589, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ti", "task": "translation", "metric": "bleu", - "score": 0.4230074457298372, - "sentence_nr": 19 + "score": 0.054611896822102844, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ti", "task": "translation", "metric": "chrf", - "score": 0.4432451111759523, - "sentence_nr": 19 + "score": 0.21077174268367538, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "be", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 + "score": 0.23361580096963977, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "be", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 + "score": 0.4620323651475797, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", "task": "translation", "metric": "bleu", - "score": 0.6363676859401174, - "sentence_nr": 19 + "score": 0.2464000786532921, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", "task": "translation", "metric": "chrf", - "score": 0.6744544901797789, - "sentence_nr": 19 + "score": 0.44961038359873023, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 + "score": 0.2761854595042038, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 + "score": 0.505027473861755, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 19 + "score": 0.23803104895136312, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 19 + "score": 0.46632697077584034, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 + "score": 0.26526917699436564, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 + "score": 0.49277477098127986, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 + "score": 0.18465678525703502, + "sentence_nr": 0 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 + "score": 0.4580829060349368, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "be", "task": "translation", "metric": "bleu", - "score": 0.9271746317040298, - "sentence_nr": 19 + "score": 0.27522942238370274, + "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "be", "task": "translation", "metric": "chrf", - "score": 0.9736668125871423, - "sentence_nr": 19 + "score": 0.47639859532742806, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "be", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 + "score": 0.2534162743837895, + "sentence_nr": 0 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "be", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 + "score": 0.47327726917113905, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "be", "task": "translation", "metric": "bleu", - "score": 0.6986939462620247, - "sentence_nr": 19 + "score": 0.2442210345943194, + "sentence_nr": 0 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "be", "task": "translation", "metric": "chrf", - "score": 0.7821077250864037, - "sentence_nr": 19 + "score": 0.44602068344467516, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 + "score": 0.2650730112906958, + "sentence_nr": 0 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 + "score": 0.46711748522451396, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "be", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 + "score": 0.06394923432983099, + "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "be", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 + "score": 0.32186431939465493, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "be", "task": "translation", "metric": "bleu", - "score": 0.9184678024441792, - "sentence_nr": 19 + "score": 0.2147261886416508, + "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "be", "task": "translation", "metric": "chrf", - "score": 0.8884834862973964, - "sentence_nr": 19 + "score": 0.47511420742329435, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", "task": "translation", "metric": "bleu", - "score": 0.3797391466432489, - "sentence_nr": 19 + "score": 0.06850339366064954, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", "task": "translation", "metric": "chrf", - "score": 0.3481158447116987, - "sentence_nr": 19 + "score": 0.2947252945949938, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", "task": "translation", "metric": "bleu", - "score": 0.31102805827817165, - "sentence_nr": 19 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", "task": "translation", "metric": "chrf", - "score": 0.3375837027261476, - "sentence_nr": 19 + "score": 0.20218909354463535, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "lua", "task": "translation", "metric": "bleu", - "score": 0.19710660977672484, - "sentence_nr": 19 + "score": 0.1512699697277094, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "lua", "task": "translation", "metric": "chrf", - "score": 0.2646181750020499, - "sentence_nr": 19 + "score": 0.3128685016104829, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", "task": "translation", "metric": "bleu", - "score": 0.3797391466432489, - "sentence_nr": 19 + "score": 0.16920203169238196, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", "task": "translation", "metric": "chrf", - "score": 0.3274816319655301, - "sentence_nr": 19 + "score": 0.36485146073913816, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", "task": "translation", "metric": "bleu", - "score": 0.28493958837889694, - "sentence_nr": 19 + "score": 0.21476638434350095, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", "task": "translation", "metric": "chrf", - "score": 0.35876163607595707, - "sentence_nr": 19 + "score": 0.3754665454783431, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", "task": "translation", "metric": "bleu", - "score": 0.2485364833746714, - "sentence_nr": 19 + "score": 0.07004290256012695, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", "task": "translation", "metric": "chrf", - "score": 0.2873862688213756, - "sentence_nr": 19 + "score": 0.2672503731106106, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", "task": "translation", "metric": "bleu", - "score": 0.41664461891968263, - "sentence_nr": 19 + "score": 0.35506855852882296, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", "task": "translation", "metric": "chrf", - "score": 0.42600414573009276, - "sentence_nr": 19 + "score": 0.5556495329232688, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", "task": "translation", "metric": "bleu", - "score": 0.2710684964643971, - "sentence_nr": 19 + "score": 0.21052905851500206, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", "task": "translation", "metric": "chrf", - "score": 0.2982841390442802, - "sentence_nr": 19 + "score": 0.38934548871055996, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "lua", "task": "translation", "metric": "bleu", - "score": 0.23005567239800093, - "sentence_nr": 19 + "score": 0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "lua", "task": "translation", "metric": "chrf", - "score": 0.29184715566281483, - "sentence_nr": 19 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", "task": "translation", "metric": "bleu", - "score": 0.2741455993358603, - "sentence_nr": 19 + "score": 0.22330682701224286, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", "task": "translation", "metric": "chrf", - "score": 0.36403543443534025, - "sentence_nr": 19 + "score": 0.4107198616872477, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", "task": "translation", "metric": "bleu", - "score": 0.34279101776553306, - "sentence_nr": 19 + "score": 0.0, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.42600414573009276, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.30955822779938535, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.39546682876478195, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.39475108115635776, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.42154888635191134, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.2781617026804374, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.32302333182207527, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.08473168573832755, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.25650903369815853, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.2883871807684295, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.21660761852515356, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.25414220830184964, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.32910644083871465, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.29306886812256966, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.18084108219203518, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.27583433958197495, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.25612947694888455, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.3002607987321696, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3216291288446239, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4272249853925079, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.188590266789637, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.26177705380820604, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.3308736026652116, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.3875427536757155, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.28432597056103653, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", "task": "translation", "metric": "chrf", - "score": 0.35944124408933287, - "sentence_nr": 19 + "score": 0.2756948589881712, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", "task": "translation", "metric": "bleu", - "score": 0.23631465024334478, - "sentence_nr": 19 + "score": 0.15653267028380505, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", "task": "translation", "metric": "chrf", - "score": 0.2692006325646732, - "sentence_nr": 19 + "score": 0.3368420125937692, + "sentence_nr": 0 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.259615032947222, - "sentence_nr": 19 + "score": 0.15815751066481462, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.2855780701161316, - "sentence_nr": 19 + "score": 0.5152611872266766, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.07407154448063642, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.31343233007308363, - "sentence_nr": 20 + "score": 0.43145434527321425, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.12903696060775005, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.28662182336952924, - "sentence_nr": 20 + "score": 0.456225988032654, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", @@ -24274,127 +23458,111 @@ "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 20 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.289946670354745, - "sentence_nr": 20 + "score": 0.024459391267874976, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.12351824822447692, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.2585958231966256, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1574562620502688, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2833933092608246, - "sentence_nr": 20 + "score": 0.46822754470803873, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 20 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.2330649391612961, - "sentence_nr": 20 + "score": 0.4031456247133876, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.19194937906573872, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.2858508520944113, - "sentence_nr": 20 + "score": 0.5477665664300843, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.17248469309075373, - "sentence_nr": 20 + "score": 0.20669086265781264, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", + "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.3673041887389201, - "sentence_nr": 20 + "score": 0.5076721272198604, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.17630490037560695, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.28838937143148047, - "sentence_nr": 20 + "score": 0.48116430160978857, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 20 + "score": 0.15611634095633747, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 20 + "score": 0.5075814499747183, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -24402,191 +23570,191 @@ "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 20 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.25480888745972646, - "sentence_nr": 20 + "score": 0.4122750002638689, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", - "score": 0.14839290005301392, - "sentence_nr": 20 + "score": 0.15412719160788987, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", - "score": 0.29565285341782266, - "sentence_nr": 20 + "score": 0.5010353699512481, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.12369892692249995, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.22266775943086, - "sentence_nr": 20 + "score": 0.44549610902403686, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.10508106635796587, - "sentence_nr": 20 + "score": 0.06647168102389285, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.2504422832248121, - "sentence_nr": 20 + "score": 0.34350832619898364, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.12560672881768975, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.22563365567811913, - "sentence_nr": 20 + "score": 0.4969560260291519, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.12913533075470382, - "sentence_nr": 20 + "score": 0.17077058518804336, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.24776496881674256, - "sentence_nr": 20 + "score": 0.5022008374701596, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.10784756064735967, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.08680476715745516, - "sentence_nr": 20 + "score": 0.4427230465401631, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.06656213940646748, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.22066482174709295, - "sentence_nr": 20 + "score": 0.38435741328258305, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.1694466724647263, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 20 + "score": 0.4902502031746037, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.175396614619324, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 20 + "score": 0.49736499605529066, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.12117880855911824, - "sentence_nr": 20 + "score": 0.15154395847232716, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", + "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.32137825349405363, - "sentence_nr": 20 + "score": 0.46053919348995803, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 20 + "score": 0.1609675245202845, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 20 + "score": 0.5069863833094232, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", @@ -24594,10459 +23762,192051 @@ "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 20 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.20104685618767446, - "sentence_nr": 20 + "score": 0.4041678259311437, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.1290514243115152, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", - "score": 0.25137213099939626, - "sentence_nr": 20 + "score": 0.4766581477336301, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.12422788549118892, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.28372673673489807, - "sentence_nr": 20 + "score": 0.40222210564426, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.164799256779143, - "sentence_nr": 20 + "score": 0.09735981717515908, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.32187376249458133, - "sentence_nr": 20 + "score": 0.35288934658906385, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.08273178236238297, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.2969522070783606, - "sentence_nr": 20 + "score": 0.36399666460809255, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.14440617372843148, - "sentence_nr": 20 + "score": 0.13012870333257068, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.27200704330334224, - "sentence_nr": 20 + "score": 0.3852835519852091, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 20 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.2442053369522631, - "sentence_nr": 20 + "score": 0.3356633416447032, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 20 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.33050427873462274, - "sentence_nr": 20 + "score": 0.28789057461471257, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.2133219421911448, - "sentence_nr": 20 + "score": 0.14040851441890545, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.3424665224706109, - "sentence_nr": 20 + "score": 0.41423464679009114, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 20 + "score": 0.10467757347424328, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.8944054777319608, - "sentence_nr": 20 + "score": 0.36749853206282146, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.0952569581727979, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.24197054442617688, - "sentence_nr": 20 + "score": 0.38264808953110185, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.21682999057776514, - "sentence_nr": 20 + "score": 0.13026649757585426, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.3722897460532404, - "sentence_nr": 20 + "score": 0.41550755035304077, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 20 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.24424323100599224, - "sentence_nr": 20 + "score": 0.039782861678265974, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", "task": "translation", "metric": "bleu", - "score": 0.2205591704292585, - "sentence_nr": 20 + "score": 0.1175904695048123, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", "task": "translation", "metric": "chrf", - "score": 0.3479467223515336, - "sentence_nr": 20 + "score": 0.3996881234028031, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.13714845589364738, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.1926917267834754, - "sentence_nr": 20 + "score": 0.45499281593451946, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.11564012893219777, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.4545444680350158, - "sentence_nr": 20 + "score": 0.44599783682350064, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.12601482779921785, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.17580772500133016, - "sentence_nr": 20 + "score": 0.43595665254608706, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.12022286401047096, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.32957763052496886, - "sentence_nr": 20 + "score": 0.48279986805368713, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.2148084015365523, - "sentence_nr": 20 + "score": 0.15350377490367967, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.40974307981059804, - "sentence_nr": 20 + "score": 0.47645148444499064, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 20 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.29622141199363383, - "sentence_nr": 20 + "score": 0.38785209659947417, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.10565372462613234, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.24146688269469918, - "sentence_nr": 20 + "score": 0.44438099138270787, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.09958408398703665, - "sentence_nr": 20 + "score": 0.12848168928706002, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.22890983822248492, - "sentence_nr": 20 + "score": 0.4421263683867116, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.20795712301883962, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.18629760071299903, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.282761705091657, - "sentence_nr": 20 + "score": 0.4381418376415505, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.09198045184317984, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.2551114536415265, - "sentence_nr": 20 + "score": 0.4598393646838097, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.18112053860965763, - "sentence_nr": 20 + "score": 0.07798530247118374, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.3266298821510716, - "sentence_nr": 20 + "score": 0.4006113700211268, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", "task": "translation", "metric": "bleu", - "score": 0.1423412184218882, - "sentence_nr": 20 + "score": 0.14541923959059266, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", "task": "translation", "metric": "chrf", - "score": 0.26467729752192487, - "sentence_nr": 20 + "score": 0.47577612932999147, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 + "score": 0.3026566818840519, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.286072901441292, - "sentence_nr": 20 + "score": 0.5945859352092411, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 21 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.2851456053265138, - "sentence_nr": 21 + "score": 0.2521233582161207, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 + "score": 0.40959087443621306, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.09858834583812252, - "sentence_nr": 21 + "score": 0.6348509381122925, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.7445389400758123, - "sentence_nr": 21 + "score": 0.40959087443621306, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.9134769668037408, - "sentence_nr": 21 + "score": 0.6348509381122925, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 + "score": 0.3182970443542658, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2506297252541463, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "score": 0.5953162569846108, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.8320381765431424, - "sentence_nr": 21 + "score": 0.18816868192268246, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.9129044064886581, - "sentence_nr": 21 + "score": 0.5179253053631742, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 + "score": 0.2354441600194623, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "score": 0.5020320865374484, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 + "score": 0.09629060614977814, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.23443139907396643, - "sentence_nr": 21 + "score": 0.43565498999747165, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 + "score": 0.40959087443621306, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "score": 0.6348509381122925, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 21 + "score": 0.28418123342684043, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 21 + "score": 0.539816402671069, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 + "score": 0.3765959322920135, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "score": 0.6295826606382191, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 + "score": 0.40801269202545287, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ar", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "score": 0.6210533025653295, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 + "score": 0.07793031063789554, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.29972668857564216, - "sentence_nr": 21 + "score": 0.3700181221537743, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.12409597120849801, - "sentence_nr": 21 + "score": 0.0867932999243575, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.2928237514438983, - "sentence_nr": 21 + "score": 0.4201964133235075, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.15083364266523736, - "sentence_nr": 21 + "score": 0.08214106568089705, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.38662429787924074, - "sentence_nr": 21 + "score": 0.3969463877642616, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 + "score": 0.1897299381066278, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.22849324967229787, - "sentence_nr": 21 + "score": 0.5086851537953713, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 21 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.44152236347960977, - "sentence_nr": 21 + "score": 0.3833939462124923, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 21 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.2989569143807341, - "sentence_nr": 21 + "score": 0.2718653389257641, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 21 + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 21 + "score": 0.42323664675852685, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 21 + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 21 + "score": 0.3327209336079636, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 + "score": 0.1804000267306113, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.4042166909648807, - "sentence_nr": 21 + "score": 0.451798442226037, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 + "score": 0.1777835117834348, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.3423939053207622, - "sentence_nr": 21 + "score": 0.5166806073547074, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 21 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.17611268473423294, - "sentence_nr": 21 + "score": 0.009396473650937872, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 + "score": 0.13582906387565688, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "ur", "task": "translation", "metric": "chrf", - "score": 0.20441543914149457, - "sentence_nr": 21 + "score": 0.43344913217266734, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.18928624746011372, - "sentence_nr": 21 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.43639616127375797, - "sentence_nr": 21 + "score": 0.41649654108052436, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.29213008358451265, - "sentence_nr": 21 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.5828788445270403, - "sentence_nr": 21 + "score": 0.3630576975795868, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.14679869139754204, - "sentence_nr": 21 + "score": 0.0744904632040495, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.4021419566569229, - "sentence_nr": 21 + "score": 0.4111163205685468, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.329340597116918, - "sentence_nr": 21 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.6347143291802012, - "sentence_nr": 21 + "score": 0.4363130300030932, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.2868708266227936, - "sentence_nr": 21 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.5779499593492363, - "sentence_nr": 21 + "score": 0.41747276065817185, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 + "score": 0.07749370908741021, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.3436610762802303, - "sentence_nr": 21 + "score": 0.3853293582383978, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.2782087319667435, - "sentence_nr": 21 + "score": 0.08950426271691118, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.632418768195088, - "sentence_nr": 21 + "score": 0.419071051019247, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.3083012995502152, - "sentence_nr": 21 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.6392851743718383, - "sentence_nr": 21 + "score": 0.43622390508229153, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.23050898626566632, - "sentence_nr": 21 + "score": 0.13343258247486778, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.48172150010681464, - "sentence_nr": 21 + "score": 0.4018842345370629, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.1969221590285716, - "sentence_nr": 21 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.5644899370701738, - "sentence_nr": 21 + "score": 0.4250905063113662, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.32594818888335836, - "sentence_nr": 21 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.49646222671189383, - "sentence_nr": 21 + "score": 0.36347800793516216, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", "task": "translation", "metric": "bleu", - "score": 0.4604008032403599, - "sentence_nr": 21 + "score": 0.06254678076846341, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", "task": "translation", "metric": "chrf", - "score": 0.7444026788985108, - "sentence_nr": 21 + "score": 0.3887428577633272, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 + "score": 0.08767210132815903, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.29161716271402766, - "sentence_nr": 21 + "score": 0.40476518002703893, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.47302621872495865, - "sentence_nr": 21 + "score": 0.08616711094288851, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.6854823532900025, - "sentence_nr": 21 + "score": 0.3696512763473903, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 + "score": 0.12894104034845807, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.3546725638586892, - "sentence_nr": 21 + "score": 0.4486368934849452, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.21468316165048362, - "sentence_nr": 21 + "score": 0.08825252192863794, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.6851126041819388, - "sentence_nr": 21 + "score": 0.4377853721520782, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.250737833894674, - "sentence_nr": 21 + "score": 0.05345137572833361, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.40017617077306594, - "sentence_nr": 21 + "score": 0.3829169125379508, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 + "score": 0.05422898988559086, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.27204995504877727, - "sentence_nr": 21 + "score": 0.335890201952113, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 21 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.2743963944428051, - "sentence_nr": 21 + "score": 0.4414911655469702, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.6341922683775969, - "sentence_nr": 21 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.7252122374710612, - "sentence_nr": 21 + "score": 0.34617921188455225, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.12586347848916266, - "sentence_nr": 21 + "score": 0.0588222649477664, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.3554854950683664, - "sentence_nr": 21 + "score": 0.3642771871011383, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 + "score": 0.10186730973904586, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.3889045463729729, - "sentence_nr": 21 + "score": 0.43665642120840553, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.20229280648000492, - "sentence_nr": 21 + "score": 0.08248974616169381, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.6194717199605934, - "sentence_nr": 21 + "score": 0.40456777770242314, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 + "score": 0.10496714075880566, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", "task": "translation", "metric": "chrf", - "score": 0.31114459650134146, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.11856660123276004, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.34601719602607445, - "sentence_nr": 21 + "score": 0.4262440114275301, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.40072710492884706, - "sentence_nr": 21 + "score": 0.14738500064905094, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.7206046648616748, - "sentence_nr": 21 + "score": 0.4659728395318289, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.22174147515312165, - "sentence_nr": 22 + "score": 0.15386029327005746, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.2117279815687756, - "sentence_nr": 22 + "score": 0.43911482594829104, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.33999170096577974, - "sentence_nr": 22 + "score": 0.10070927557742705, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.29221353951377876, - "sentence_nr": 22 + "score": 0.43718220262892105, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.3058731661111107, - "sentence_nr": 22 + "score": 0.11478463129234825, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.2747352174231836, - "sentence_nr": 22 + "score": 0.4651957501593415, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 22 + "score": 0.07137101582673294, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.42736771185803385, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.39727964545172, - "sentence_nr": 22 + "score": 0.4075406301092705, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.10975022749274138, - "sentence_nr": 22 + "score": 0.0643329477522681, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.13904829787402162, - "sentence_nr": 22 + "score": 0.3960585990192623, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.2873518361947954, - "sentence_nr": 22 + "score": 0.15050382816942576, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.24505805183333226, - "sentence_nr": 22 + "score": 0.4714951011303657, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.33495074569972355, - "sentence_nr": 22 + "score": 0.17247941414020762, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.3454509072842772, - "sentence_nr": 22 + "score": 0.48320144379865687, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.43090467385890824, - "sentence_nr": 22 + "score": 0.057981164297440296, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.3813511699401743, - "sentence_nr": 22 + "score": 0.33896784137459673, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 22 + "score": 0.09751270821852938, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 22 + "score": 0.395617758442078, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.22765977642995502, - "sentence_nr": 22 + "score": 0.06301432444316532, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.2247283208344801, - "sentence_nr": 22 + "score": 0.4249725532507508, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "bleu", - "score": 0.30931906627981315, - "sentence_nr": 22 + "score": 0.18248753930464637, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation", "metric": "chrf", - "score": 0.2527893205238235, - "sentence_nr": 22 + "score": 0.4759830743101189, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "score": 0.3370100422576744, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "score": 0.1946966569103724, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 + "score": 0.0772718393063023, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "score": 0.4203683137304257, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.9210500207490827, - "sentence_nr": 22 + "score": 0.08291357159799752, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.9069369532463243, - "sentence_nr": 22 + "score": 0.4009694996956877, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "score": 0.3714280466838255, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "score": 0.3538602132402044, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 + "score": 0.09026606980896171, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "score": 0.46031801298163716, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.4607778969984477, - "sentence_nr": 22 + "score": 0.167672929900467, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.8103868370118212, - "sentence_nr": 22 + "score": 0.46910779766306765, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 + "score": 0.0, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "score": 0.32123020755377657, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "score": 0.47435308668900444, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.4885014761119101, - "sentence_nr": 22 + "score": 0.08351211898903935, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.827819363745503, - "sentence_nr": 22 + "score": 0.33030812447506436, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", "task": "translation", "metric": "bleu", - "score": 0.17903870455040152, - "sentence_nr": 22 + "score": 0.07528927678469202, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", "task": "translation", "metric": "chrf", - "score": 0.18440575845606422, - "sentence_nr": 22 + "score": 0.422513417362817, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.1981763713215807, - "sentence_nr": 22 + "score": 0.09084091756463074, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.2520139548059959, - "sentence_nr": 22 + "score": 0.4286741659142759, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.17499310607879404, - "sentence_nr": 22 + "score": 0.06126604215610123, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.18175908515502465, - "sentence_nr": 22 + "score": 0.3837677428398438, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.10089587713517954, - "sentence_nr": 22 + "score": 0.0756907193511249, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.11552870044063634, - "sentence_nr": 22 + "score": 0.4138725093679467, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.3168035112884022, - "sentence_nr": 22 + "score": 0.08866637424249016, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.30580678632835573, - "sentence_nr": 22 + "score": 0.44876462229383973, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.10825039887617824, - "sentence_nr": 22 + "score": 0.10574428430204418, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.1278708456868984, - "sentence_nr": 22 + "score": 0.45371814600333005, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.29705138694670025, - "sentence_nr": 22 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.2780223931578523, - "sentence_nr": 22 + "score": 0.3980589439671235, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.18986262747887736, - "sentence_nr": 22 + "score": 0.15124503767921774, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.18230825914917978, - "sentence_nr": 22 + "score": 0.4874495869756225, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.42442305789888696, - "sentence_nr": 22 + "score": 0.12649672885841734, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "google/gemma-3-27b-it", "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.42734795538422576, - "sentence_nr": 22 + "score": 0.41329609863930566, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.18781316135387768, - "sentence_nr": 22 + "score": 0.07465265387221826, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.16808430602651067, - "sentence_nr": 22 + "score": 0.4274000630396105, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.3454156644973841, - "sentence_nr": 22 + "score": 0.1219449069656942, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.30446460704247824, - "sentence_nr": 22 + "score": 0.4776943038671049, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.4479597674250984, - "sentence_nr": 22 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.41132840401983517, - "sentence_nr": 22 + "score": 0.4606409590817001, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", "task": "translation", "metric": "bleu", - "score": 0.10704445941620296, - "sentence_nr": 22 + "score": 0.07996209785853586, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", "task": "translation", "metric": "chrf", - "score": 0.13527356658034445, - "sentence_nr": 22 + "score": 0.4279137012019699, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.09941527806251362, - "sentence_nr": 22 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.13609735884978696, - "sentence_nr": 22 + "score": 0.16764957347186446, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.19230259308735756, - "sentence_nr": 22 + "score": 0.1195053737774238, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.22211286692050705, - "sentence_nr": 22 + "score": 0.4512136289975786, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.20383889880388334, - "sentence_nr": 22 + "score": 0.21748353646757182, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.17813562619757226, - "sentence_nr": 22 + "score": 0.4462746462826943, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.2986551380628858, - "sentence_nr": 22 + "score": 0.24443999371485628, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.30308773908860176, - "sentence_nr": 22 + "score": 0.4991016392840656, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.10536111661637193, - "sentence_nr": 22 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.13679626017050403, - "sentence_nr": 22 + "score": 0.34155562837143877, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.19732230687816163, - "sentence_nr": 22 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.22765162763479738, - "sentence_nr": 22 + "score": 0.1139393935967296, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.3987203877706927, - "sentence_nr": 22 + "score": 0.15066783649768578, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.39992851145514274, - "sentence_nr": 22 + "score": 0.4483285849553733, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 22 + "score": 0.1701935252826955, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.09467800236923245, - "sentence_nr": 22 + "score": 0.4455315745640286, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.11434380596647938, - "sentence_nr": 22 + "score": 0.11941817189528041, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.15034676904545285, - "sentence_nr": 22 + "score": 0.4275071634813637, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.36138016740101575, - "sentence_nr": 22 + "score": 0.08932983819566953, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.31224382417562974, - "sentence_nr": 22 + "score": 0.412238728569517, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.345966570287759, - "sentence_nr": 22 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.2816115803298224, - "sentence_nr": 22 + "score": 0.35698569920901285, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", "task": "translation", "metric": "bleu", - "score": 0.3461146475963348, - "sentence_nr": 22 + "score": 0.1275291133503835, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", "task": "translation", "metric": "chrf", - "score": 0.30131374176129855, - "sentence_nr": 22 + "score": 0.4127884601900206, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.1552102601937674, - "sentence_nr": 22 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.1381803727119777, - "sentence_nr": 22 + "score": 0.4331286519146886, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.4967067363118649, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.6330776418175281, - "sentence_nr": 23 + "score": 0.3538966478758119, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.39501632817024007, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.5629116515332234, - "sentence_nr": 23 + "score": 0.4179644538349004, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.44774758283371513, - "sentence_nr": 23 + "score": 0.11116091368823534, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.6323151453499094, - "sentence_nr": 23 + "score": 0.43307401079748475, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.3353166764160673, - "sentence_nr": 23 + "score": 0.07649978886725356, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.5279751808070301, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3340392563357978, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5542299582982266, - "sentence_nr": 23 + "score": 0.41031664319131844, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.2288355034549531, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.469883747317403, - "sentence_nr": 23 + "score": 0.2626949949898101, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.5472915485853102, - "sentence_nr": 23 + "score": 0.15077540572671325, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.7136367183558585, - "sentence_nr": 23 + "score": 0.43064021519574214, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.6159995640523437, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.8398584608765305, - "sentence_nr": 23 + "score": 0.3795844422349344, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.5155625728615272, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.6435263800797054, - "sentence_nr": 23 + "score": 0.3964061846611735, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 23 + "score": 0.07012053105310272, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 23 + "score": 0.3123716745719453, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 23 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.32206162101132135, - "sentence_nr": 23 + "score": 0.3856352748003268, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "bleu", - "score": 0.24125880497129865, - "sentence_nr": 23 + "score": 0.0876062628502436, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "id", "task": "translation", "metric": "chrf", - "score": 0.47825499190432214, - "sentence_nr": 23 + "score": 0.3978552283854932, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.3292010361291119, - "sentence_nr": 23 + "score": 0.11378204941109882, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.5670300297444607, - "sentence_nr": 23 + "score": 0.4981472095171313, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.46086624699736534, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.6510894943437193, - "sentence_nr": 23 + "score": 0.43759229210123524, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.5511532346688224, - "sentence_nr": 23 + "score": 0.10505106462290037, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.7550305399541021, - "sentence_nr": 23 + "score": 0.4474870048911137, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.34537865578685034, - "sentence_nr": 23 + "score": 0.11534976570369744, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.5956718372193373, - "sentence_nr": 23 + "score": 0.46761329904761845, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.39080227521872696, - "sentence_nr": 23 + "score": 0.0487561532099542, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.621048393466749, - "sentence_nr": 23 + "score": 0.3938156291645021, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.2755396296659942, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.5033588333252278, - "sentence_nr": 23 + "score": 0.4098374118843212, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.5795086255869999, - "sentence_nr": 23 + "score": 0.09697921503203778, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7183582779188291, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6214211316495574, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.7844755306149331, - "sentence_nr": 23 + "score": 0.47076286112111615, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.6008383045972477, - "sentence_nr": 23 + "score": 0.11335203496873462, + "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.7291842011448325, - "sentence_nr": 23 + "score": 0.46528080200591054, + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0, - "sentence_nr": 23 + "sentence_nr": 1 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.0, - "sentence_nr": 23 + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 23 + "score": 0.108829546976023, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.25418196696822093, - "sentence_nr": 23 + "score": 0.4177339268402449, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 23 + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 0.5117784549266909, - "sentence_nr": 23 + "score": 0.008776218574747889, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "de", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 23 + "score": 0.09026606980896171, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "de", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 + "score": 0.4602880143145438, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.274941620352113, - "sentence_nr": 23 + "score": 0.1059352062327485, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.4651004879148919, - "sentence_nr": 23 + "score": 0.4291550754056065, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.22743363869750483, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.5634710936922129, - "sentence_nr": 23 + "score": 0.4239838444198129, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.518836150464752, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.6242496691584447, - "sentence_nr": 23 + "score": 0.0009218289085545725, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.5989032124636781, - "sentence_nr": 23 + "score": 0.14883746844067872, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.7291306908177887, - "sentence_nr": 23 + "score": 0.4476843235219058, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.2677353447271197, - "sentence_nr": 23 + "score": 0.07999819990926477, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.4460422364967209, - "sentence_nr": 23 + "score": 0.3889987132692464, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.3558785149067877, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.570837784052645, - "sentence_nr": 23 + "score": 0.3916082207331212, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.2624310277292268, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.4915471393606767, - "sentence_nr": 23 + "score": 0.45439447866906496, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.28489318277723963, - "sentence_nr": 23 + "score": 0.09171389226334559, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.6000278331909762, - "sentence_nr": 23 + "score": 0.47356776940569145, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.5728668995816387, - "sentence_nr": 23 + "score": 0.16136315230667173, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.7460634178179616, - "sentence_nr": 23 + "score": 0.48202529715173736, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.46086624699736534, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.6510894943437193, - "sentence_nr": 23 + "score": 0.4750212573397775, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.46507550803536196, - "sentence_nr": 23 + "score": 0.003607064963668313, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.6687857543858925, - "sentence_nr": 23 + "score": 0.09577479457615844, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "ja", "task": "translation", "metric": "bleu", - "score": 0.5155625728615272, - "sentence_nr": 23 + "score": 0.11161133657801552, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "ja", "task": "translation", "metric": "chrf", - "score": 0.6435263800797054, - "sentence_nr": 23 + "score": 0.4277891734340718, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.3639412530979476, - "sentence_nr": 23 + "score": 0.15720527174368754, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.654342605671994, - "sentence_nr": 23 + "score": 0.47882285385622714, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.19882981891203355, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.45714526865696425, - "sentence_nr": 23 + "score": 0.3374178992279451, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.32269274420690436, - "sentence_nr": 23 + "score": 0.15653859793617866, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.49704406859630557, - "sentence_nr": 23 + "score": 0.43177798053127925, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.42849655626964983, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.662646931303495, - "sentence_nr": 23 + "score": 0.3848892678578171, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.3937441173550755, - "sentence_nr": 23 + "score": 0.059281546387121374, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.5600824723479425, - "sentence_nr": 23 + "score": 0.31614571419525433, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.23114663823833642, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.5107406700140826, - "sentence_nr": 23 + "score": 0.2973352934874205, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.419793811546288, - "sentence_nr": 23 + "score": 0.15510829053669334, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.6152785242440109, - "sentence_nr": 23 + "score": 0.4231895807656464, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.46086624699736534, - "sentence_nr": 23 + "score": 0.27560832232663307, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.6510894943437193, - "sentence_nr": 23 + "score": 0.5392779492225674, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.46507550803536196, - "sentence_nr": 23 + "score": 0.09545026362079756, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.6687857543858925, - "sentence_nr": 23 + "score": 0.2895757560105421, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.2296660762967038, - "sentence_nr": 23 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.5259172094145851, - "sentence_nr": 23 + "score": 0.42348732385732035, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.39501632817024007, - "sentence_nr": 23 + "score": 0.08302169728235531, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.5505822266189535, - "sentence_nr": 23 + "score": 0.3447730755591614, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", "task": "translation", "metric": "bleu", - "score": 0.3215000448278979, - "sentence_nr": 23 + "score": 0.12785320519680665, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", "task": "translation", "metric": "chrf", - "score": 0.5947774549102596, - "sentence_nr": 23 + "score": 0.46613459917836336, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.20870371467330825, - "sentence_nr": 23 + "score": 0.0982484177591637, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.40726160697608454, - "sentence_nr": 23 + "score": 0.4109236039282987, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.3460579711860666, - "sentence_nr": 24 + "score": 0.10106439835419144, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.43910565102067395, - "sentence_nr": 24 + "score": 0.44450926478634867, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.16692770661327389, - "sentence_nr": 24 + "score": 0.0891537192318598, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.2940239540182693, - "sentence_nr": 24 + "score": 0.3970634926176537, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.17589867762235817, - "sentence_nr": 24 + "score": 0.07992844954996121, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.2991014535844428, - "sentence_nr": 24 + "score": 0.4400081800535333, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.15568794672327907, - "sentence_nr": 24 + "score": 0.09554681544059333, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.30284457998681635, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.17340302865304977, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.28581037214602456, - "sentence_nr": 24 + "score": 0.41422485590617925, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.1782509297990519, - "sentence_nr": 24 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.28710039249342334, - "sentence_nr": 24 + "score": 0.32015903488199987, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.4901491669500622, - "sentence_nr": 24 + "score": 0.06169438305755944, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.5638035394617603, - "sentence_nr": 24 + "score": 0.3642077907264287, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.3460579711860666, - "sentence_nr": 24 + "score": 0.14483568709851755, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.4260473803699743, - "sentence_nr": 24 + "score": 0.45354858647333196, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.2011131382865372, - "sentence_nr": 24 + "score": 0.051823166648868844, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.36314253622836745, - "sentence_nr": 24 + "score": 0.3292846108177459, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.17598839092477797, - "sentence_nr": 24 + "score": 0.10444675051855158, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.28650792027744043, - "sentence_nr": 24 + "score": 0.38166524283468484, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 24 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.15997462319973554, - "sentence_nr": 24 + "score": 0.021139000776129766, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "bleu", - "score": 0.24731742205813823, - "sentence_nr": 24 + "score": 0.11663764605404517, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "mr", "task": "translation", "metric": "chrf", - "score": 0.3980108204104697, - "sentence_nr": 24 + "score": 0.41241788679161784, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.5611872124508993, - "sentence_nr": 24 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.7431443902355421, - "sentence_nr": 24 + "score": 0.16496711525651045, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.4465866985385432, - "sentence_nr": 24 + "score": 0.052359103292999656, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.6260699913485588, - "sentence_nr": 24 + "score": 0.3805982553288677, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.4465866985385432, - "sentence_nr": 24 + "score": 0.0950136506275681, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.6260699913485588, - "sentence_nr": 24 + "score": 0.4372017487229785, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.25509991414681377, - "sentence_nr": 24 + "score": 0.0946260953698702, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.505614827211273, - "sentence_nr": 24 + "score": 0.4353772493110627, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.21452424426866915, - "sentence_nr": 24 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.44780791445343104, - "sentence_nr": 24 + "score": 0.15714515459910894, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.23857086413632697, - "sentence_nr": 24 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.47971483823439903, - "sentence_nr": 24 + "score": 0.11183036824736405, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.446411600799131, - "sentence_nr": 24 + "score": 0.1651856335196525, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.5816697577563045, - "sentence_nr": 24 + "score": 0.4631382298313573, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.4664526119731094, - "sentence_nr": 24 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.6399376431552989, - "sentence_nr": 24 + "score": 0.16543976568828428, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.20156032858716424, - "sentence_nr": 24 + "score": 0.07939562512368398, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.4855075115512445, - "sentence_nr": 24 + "score": 0.34250389101564743, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.1526900266679129, - "sentence_nr": 24 + "score": 0.07545713066088315, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.41716995830580594, - "sentence_nr": 24 + "score": 0.35068174137403757, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 24 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.23259933287371404, - "sentence_nr": 24 + "score": 0.1113696974855524, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "bleu", - "score": 0.20835831728362864, - "sentence_nr": 24 + "score": 0.07652593079250605, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "jv", "task": "translation", "metric": "chrf", - "score": 0.49812931259693377, - "sentence_nr": 24 + "score": 0.3602429629880003, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.17334119484500185, - "sentence_nr": 24 + "score": 0.12506460115047335, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.31463785312250736, - "sentence_nr": 24 + "score": 0.46140175133635725, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.12522096513057643, - "sentence_nr": 24 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.335302418196347, - "sentence_nr": 24 + "score": 0.4404222773455128, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.9100527513271326, - "sentence_nr": 24 + "score": 0.1259356760989446, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.9584484214161733, - "sentence_nr": 24 + "score": 0.44568274520971096, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.20156032858716424, - "sentence_nr": 24 + "score": 0.1643146814613677, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.35007862377558696, - "sentence_nr": 24 + "score": 0.5127730105039489, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.3449632275226908, - "sentence_nr": 24 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.5000457205552167, - "sentence_nr": 24 + "score": 0.41972158348095406, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.15568794672327907, - "sentence_nr": 24 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.30284457998681635, - "sentence_nr": 24 + "score": 0.3866869165486058, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.1529699053146309, - "sentence_nr": 24 + "score": 0.10700354504676883, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.35702516223197556, - "sentence_nr": 24 + "score": 0.48486759891570147, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.15975615838102766, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.16928451900289662, - "sentence_nr": 24 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.40173762794247314, - "sentence_nr": 24 + "score": 0.32485028075459577, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.15568794672327907, - "sentence_nr": 24 + "score": 0.13544894983916997, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.30284457998681635, - "sentence_nr": 24 + "score": 0.4663180024816666, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.16038844415635037, - "sentence_nr": 24 + "score": 0.1289863677885349, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.30359085570641314, - "sentence_nr": 24 + "score": 0.42110704132809784, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.3595283251171754, - "sentence_nr": 24 + "score": 0.09354237835233341, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.5790446318474887, - "sentence_nr": 24 + "score": 0.4409559217991841, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", "task": "translation", "metric": "bleu", - "score": 0.20563705341552085, - "sentence_nr": 24 + "score": 0.12748506711468208, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", "task": "translation", "metric": "chrf", - "score": 0.3762774944524412, - "sentence_nr": 24 + "score": 0.4508210683728834, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.16692770661327389, - "sentence_nr": 24 + "score": 0.10127171102984855, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.2940239540182693, - "sentence_nr": 24 + "score": 0.4525620764847558, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.14165832410287266, - "sentence_nr": 24 + "score": 0.10833971870416897, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.24107149684266257, - "sentence_nr": 24 + "score": 0.4467303749319595, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.1258646065963102, - "sentence_nr": 24 + "score": 0.16322494183480127, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.24857006332411635, - "sentence_nr": 24 + "score": 0.4815584993817062, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.2519649154562495, - "sentence_nr": 24 + "score": 0.08894652425495941, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.44974180175388206, - "sentence_nr": 24 + "score": 0.444906007048383, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.3253958243003269, - "sentence_nr": 24 + "score": 0.1029835796838552, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.45173371737296786, - "sentence_nr": 24 + "score": 0.4404518759673606, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.27618177741751665, - "sentence_nr": 24 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.4305107132988055, - "sentence_nr": 24 + "score": 0.2172505600894225, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.1683625745315614, - "sentence_nr": 24 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.31167225759119427, - "sentence_nr": 24 + "score": 0.4404811365579724, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.13728361101885644, - "sentence_nr": 24 + "score": 0.12826630655689159, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.3436250633828196, - "sentence_nr": 24 + "score": 0.36561922835086, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.16353712933127018, - "sentence_nr": 24 + "score": 0.014482940348354725, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.32934735468962634, - "sentence_nr": 24 + "score": 0.1504461219252398, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.48680589893384085, - "sentence_nr": 24 + "score": 0.11993418633737256, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.6190257724123215, - "sentence_nr": 24 + "score": 0.4771538581125459, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.1551293035275564, - "sentence_nr": 24 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.2674082220133274, - "sentence_nr": 24 + "score": 0.049266699072917926, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 24 + "score": 0.10041064691273172, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", "task": "translation", "metric": "chrf", - "score": 0.26091874007348304, - "sentence_nr": 24 + "score": 0.44193101759002734, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.17598839092477797, - "sentence_nr": 24 + "score": 0.14163299203710986, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.28650792027744043, - "sentence_nr": 24 + "score": 0.3958314877752854, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.1332399603607437, - "sentence_nr": 25 + "score": 0.09463828889338871, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.19971937750838645, - "sentence_nr": 25 + "score": 0.3398200805270262, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.1834283688193615, - "sentence_nr": 25 + "score": 0.0904087252785689, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.22588088032876846, - "sentence_nr": 25 + "score": 0.41830513174690515, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.12425342874478343, - "sentence_nr": 25 + "score": 0.18237761178381828, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.1660533764831914, - "sentence_nr": 25 + "score": 0.4897620961756989, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 + "score": 0.13160881951665948, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.15538689193055893, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.14158209035366248, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.1869416235999822, - "sentence_nr": 25 + "score": 0.47196475148373473, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 25 + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.0578819658044546, - "sentence_nr": 25 + "score": 0.2854365802731815, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.16170596160446446, - "sentence_nr": 25 + "score": 0.1760489367713912, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.2206817446345091, - "sentence_nr": 25 + "score": 0.45847709984838, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.14914968848461002, - "sentence_nr": 25 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.21702090583674813, - "sentence_nr": 25 + "score": 0.3339687893627504, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 25 + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.138685682297543, - "sentence_nr": 25 + "score": 0.15216414216709395, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 25 + "score": 0.12408616318856698, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 25 + "score": 0.3876257744772486, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 25 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.1258687317121735, - "sentence_nr": 25 + "score": 0.13661459309404012, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 + "score": 0.07105699030509427, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "fa", "task": "translation", "metric": "chrf", - "score": 0.1327332961698289, - "sentence_nr": 25 + "score": 0.40480969933909144, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.23556366957615363, - "sentence_nr": 25 + "score": 0.1200100437012302, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.22894370639738668, - "sentence_nr": 25 + "score": 0.4636227306109079, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.16684195647378827, - "sentence_nr": 25 + "score": 0.17208141302168437, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.21420692177337528, - "sentence_nr": 25 + "score": 0.4542522451167506, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.33150414660895594, - "sentence_nr": 25 + "score": 0.21351902664706998, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.30808679013173407, - "sentence_nr": 25 + "score": 0.5130443042033361, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.23556366957615363, - "sentence_nr": 25 + "score": 0.10553179283083523, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.25521078373566897, - "sentence_nr": 25 + "score": 0.4283144779172244, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.14257880024595157, - "sentence_nr": 25 + "score": 0.10814706353513916, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.1979524022915653, - "sentence_nr": 25 + "score": 0.4216597036907072, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.14257880024595157, - "sentence_nr": 25 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.1979524022915653, - "sentence_nr": 25 + "score": 0.38105951101537255, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.260711748598298, - "sentence_nr": 25 + "score": 0.10102972193860417, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.28143225165615565, - "sentence_nr": 25 + "score": 0.4444474600259224, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.25621420675166556, - "sentence_nr": 25 + "score": 0.1785750235950628, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.32613185963061736, - "sentence_nr": 25 + "score": 0.43344212044851604, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.21310996044302127, - "sentence_nr": 25 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", "metric": "chrf", - "score": 0.2620829676028965, - "sentence_nr": 25 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.08892786873926031, - "sentence_nr": 25 + "score": 0.16508680260754793, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.14069122234920528, - "sentence_nr": 25 + "score": 0.4714504120878508, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.12273033502938982, - "sentence_nr": 25 + "score": 0.07862112679032317, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.15070376710164984, - "sentence_nr": 25 + "score": 0.4059503829406287, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "bleu", - "score": 0.17376029392152273, - "sentence_nr": 25 + "score": 0.14400312819313033, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "tr", "task": "translation", "metric": "chrf", - "score": 0.22421987263715565, - "sentence_nr": 25 + "score": 0.43256046228969486, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 + "score": 0.06289570792563275, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.07369293827420972, - "sentence_nr": 25 + "score": 0.3813881170279124, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 25 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.08728042965046878, - "sentence_nr": 25 + "score": 0.38763756150559275, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 25 + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.12416744870990627, - "sentence_nr": 25 + "score": 0.16269986423611488, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 + "score": 0.11960636789197196, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.06452498627127952, - "sentence_nr": 25 + "score": 0.4498565343058379, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 + "score": 0.06530397960697328, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.09758509152849626, - "sentence_nr": 25 + "score": 0.36899545840843095, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 + "score": 0.05364480688581678, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.09985298970743903, - "sentence_nr": 25 + "score": 0.3678203159539189, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.22158794642706012, - "sentence_nr": 25 + "score": 0.10060184892331835, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.20787168962643957, - "sentence_nr": 25 + "score": 0.43940648106882807, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 + "score": 0.09042147098974282, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.05401240601013853, - "sentence_nr": 25 + "score": 0.45055351363359086, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.07243671671799473, - "sentence_nr": 25 + "score": 0.06758583657764057, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.1543646468773244, - "sentence_nr": 25 + "score": 0.40657395859582235, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.09348998462584433, - "sentence_nr": 25 + "score": 0.15670253601070666, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.130990604448226, - "sentence_nr": 25 + "score": 0.4663740399508032, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.09885362316286796, - "sentence_nr": 25 + "score": 0.08269576405332207, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.15900429623613993, - "sentence_nr": 25 + "score": 0.38185156188383546, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 + "score": 0.05014252780397407, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", "task": "translation", "metric": "chrf", - "score": 0.10903227170832805, - "sentence_nr": 25 + "score": 0.38032629067357443, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.11481934989482791, - "sentence_nr": 25 + "score": 0.19045679700622437, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.1745453831609756, - "sentence_nr": 25 + "score": 0.4124342444810736, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 25 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.046916282267844764, - "sentence_nr": 25 + "score": 0.2398247112527542, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.1250076305588977, - "sentence_nr": 25 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.12985392271660248, - "sentence_nr": 25 + "score": 0.06939838145153245, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 + "score": 0.1384529882948561, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.06737080019124615, - "sentence_nr": 25 + "score": 0.38277790453523536, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.18629057860741663, - "sentence_nr": 25 + "score": 0.1162066330922535, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.1504281768235603, - "sentence_nr": 25 + "score": 0.3781611496143332, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 + "score": 0.07454232971572508, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.11099491388125307, - "sentence_nr": 25 + "score": 0.32060982856396664, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 25 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.1201070010200949, - "sentence_nr": 25 + "score": 0.42583748090002016, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 25 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.08702826664587757, - "sentence_nr": 25 + "score": 0.24120995733605022, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.42262353460370816, - "sentence_nr": 25 + "score": 0.10825804306599494, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.3966051357904673, - "sentence_nr": 25 + "score": 0.3972611038417328, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 + "score": 0.15969495416406884, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.09612004569821603, - "sentence_nr": 25 + "score": 0.4493675427485572, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.10249207815381514, - "sentence_nr": 25 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.1341907303110576, - "sentence_nr": 25 + "score": 0.21229683306385236, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", "task": "translation", "metric": "bleu", - "score": 0.11635402454082566, - "sentence_nr": 25 + "score": 0.10500492468363652, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", "task": "translation", "metric": "chrf", - "score": 0.1636348970852316, - "sentence_nr": 25 + "score": 0.35306452262726606, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 + "score": 0.05275923024775565, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.06028131279303415, - "sentence_nr": 25 + "score": 0.3724723203846839, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.0901676620993871, - "sentence_nr": 25 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6983671476675032, - "sentence_nr": 26 + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.6697193437120026, - "sentence_nr": 26 + "score": 0.3544628606759813, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.5809024483660724, - "sentence_nr": 26 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.5409616569206442, - "sentence_nr": 26 + "score": 0.3371547585108182, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.5893051076561628, - "sentence_nr": 26 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.555242666304663, - "sentence_nr": 26 + "score": 0.347335662744532, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.5197038614969076, - "sentence_nr": 26 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4944106522194635, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5863087308455573, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.5756247354842696, - "sentence_nr": 26 + "score": 0.3436708646772823, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.44763438063632005, - "sentence_nr": 26 + "score": 0.05182797087573874, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.4327706284829231, - "sentence_nr": 26 + "score": 0.3356084649197975, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.4562933372999328, - "sentence_nr": 26 + "score": 0.0709399674988252, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.4354000091116894, - "sentence_nr": 26 + "score": 0.4066552319349635, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.650945489442927, - "sentence_nr": 26 + "score": 0.08964131615841985, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.6025447507087655, - "sentence_nr": 26 + "score": 0.3962711438859162, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.5040260890269513, - "sentence_nr": 26 + "score": 0.05980107027395768, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.48159079549233025, - "sentence_nr": 26 + "score": 0.3921687449014443, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.3966338449810425, - "sentence_nr": 26 + "score": 0.0842991091574967, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.3940867714969907, - "sentence_nr": 26 + "score": 0.37839472970450666, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.3186669369694382, - "sentence_nr": 26 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.34867169182256896, - "sentence_nr": 26 + "score": 0.021188116207401797, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "bleu", - "score": 0.6350785093832516, - "sentence_nr": 26 + "score": 0.059737095980317775, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "it", "task": "translation", "metric": "chrf", - "score": 0.6188888500556722, - "sentence_nr": 26 + "score": 0.33742048813623593, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 + "score": 0.12650809806003369, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "score": 0.4579202271851988, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 + "score": 0.09676230489828269, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "score": 0.43266369498706486, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 + "score": 0.1691386174483793, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "score": 0.4920789340026317, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 + "score": 0.26337200877742073, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "score": 0.5332559901749826, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 + "score": 0.12820355595850366, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "score": 0.3621325951848801, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "score": 0.27031481031630283, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.7997394936755756, - "sentence_nr": 26 + "score": 0.19217857276238626, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.7811228513409922, - "sentence_nr": 26 + "score": 0.49145817895698257, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.9660854289024723, - "sentence_nr": 26 + "score": 0.1348878985611687, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.9613867167137871, - "sentence_nr": 26 + "score": 0.4519788039621858, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.7158159753911548, - "sentence_nr": 26 + "score": 0.05350911980603496, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.7127947486849641, - "sentence_nr": 26 + "score": 0.3720359854835493, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 0.6813410498464633, - "sentence_nr": 26 + "score": 0.11248337299167142, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 0.6671821168913319, - "sentence_nr": 26 + "score": 0.47408452973613896, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "score": 0.03735667108797313, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 + "score": 0.15907551162629324, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "fil", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "score": 0.4524235916096891, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.45066539224706753, - "sentence_nr": 26 + "score": 0.19135523280427486, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.4254592023616511, - "sentence_nr": 26 + "score": 0.49947805136320467, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.45779216736532874, - "sentence_nr": 26 + "score": 0.2356661678654945, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.40945502186629257, - "sentence_nr": 26 + "score": 0.5124350706386419, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.40071581088356767, - "sentence_nr": 26 + "score": 0.14944432524273302, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.36844216279073794, - "sentence_nr": 26 + "score": 0.4972796478830659, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.14609848125563302, - "sentence_nr": 26 + "score": 0.2128223810599462, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.18504017619904287, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.4184317523303411, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.40500270963162277, - "sentence_nr": 26 + "score": 0.5048999729332083, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.4125433652059801, - "sentence_nr": 26 + "score": 0.2159761642923436, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.3955923992862865, - "sentence_nr": 26 + "score": 0.5016920563570307, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.47182538941865537, - "sentence_nr": 26 + "score": 0.20689377284100188, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.42450279333172475, - "sentence_nr": 26 + "score": 0.499181874773421, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.46492333059956836, - "sentence_nr": 26 + "score": 0.15954322482017244, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.4401112788616263, - "sentence_nr": 26 + "score": 0.4866743072979551, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.3967795858478363, - "sentence_nr": 26 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.3803134453035716, - "sentence_nr": 26 + "score": 0.4522625000672462, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.34915707707242977, - "sentence_nr": 26 + "score": 0.005506479027884721, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.34988691421168616, - "sentence_nr": 26 + "score": 0.12804576923274616, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.2613611691981996, - "sentence_nr": 26 + "score": 0.231679638185573, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.2740054517113319, - "sentence_nr": 26 + "score": 0.5128772172047342, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.5600863252474344, - "sentence_nr": 26 + "score": 0.2128223810599462, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.5179797138258272, - "sentence_nr": 26 + "score": 0.5048999729332083, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", "task": "translation", "metric": "bleu", - "score": 0.3461243385522883, - "sentence_nr": 26 + "score": 0.11469202308276233, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", "task": "translation", "metric": "chrf", - "score": 0.3560268535895035, - "sentence_nr": 26 + "score": 0.47736356509677796, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.43650008892828823, - "sentence_nr": 26 + "score": 0.10588612806056373, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.42551924250056755, - "sentence_nr": 26 + "score": 0.4068718481729766, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.504580863725975, - "sentence_nr": 26 + "score": 0.10720391954020723, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.46703102558879955, - "sentence_nr": 26 + "score": 0.37219605281253065, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.36954961729302616, - "sentence_nr": 26 + "score": 0.09793316925795417, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.34760122558190465, - "sentence_nr": 26 + "score": 0.4297577431879659, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.3803026331533805, - "sentence_nr": 26 + "score": 0.10461993210426317, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.362200056491149, - "sentence_nr": 26 + "score": 0.411539574168363, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.24777987943516128, - "sentence_nr": 26 + "score": 0.09232875412879928, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.2952194113831596, - "sentence_nr": 26 + "score": 0.40574729737089493, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.5258092834799059, - "sentence_nr": 26 + "score": 0.054115495307563, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.4981801549352249, - "sentence_nr": 26 + "score": 0.3261514049985403, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.48625052891235754, - "sentence_nr": 26 + "score": 0.13894512516215204, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.4290939038872796, - "sentence_nr": 26 + "score": 0.45252610009573474, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.4045007320789693, - "sentence_nr": 26 + "score": 0.06986280403265237, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.4098113348256027, - "sentence_nr": 26 + "score": 0.35513587606181224, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.44158642009003995, - "sentence_nr": 26 + "score": 0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.40903259597127894, - "sentence_nr": 26 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.4946406341236379, - "sentence_nr": 26 + "score": 0.11511385959745848, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.4719975064311173, - "sentence_nr": 26 + "score": 0.43904137765866535, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.41182432358851845, - "sentence_nr": 26 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.4034715718148006, - "sentence_nr": 26 + "score": 0.24802563498261762, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", "task": "translation", "metric": "bleu", - "score": 0.3693186725771347, - "sentence_nr": 26 + "score": 0.15017237887090715, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", "task": "translation", "metric": "chrf", - "score": 0.36304188784855995, - "sentence_nr": 26 + "score": 0.4481403580546466, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.3692675983091899, - "sentence_nr": 26 + "score": 0.0967458811247473, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.37402683054534963, - "sentence_nr": 26 + "score": 0.4485783191522753, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.7645786047678913, - "sentence_nr": 27 + "score": 0.06851723496815999, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.8655501219338723, - "sentence_nr": 27 + "score": 0.40911149660575097, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.8643729226327672, - "sentence_nr": 27 + "score": 0.14184998906630783, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.9215030582508996, - "sentence_nr": 27 + "score": 0.44498159653494584, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.8643729226327672, - "sentence_nr": 27 + "score": 0.098684745093626, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.9215030582508996, - "sentence_nr": 27 + "score": 0.4007829842063641, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.7858164289172753, - "sentence_nr": 27 + "score": 0.12459539355692184, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8872272977237059, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8643729226327672, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.9215030582508996, - "sentence_nr": 27 + "score": 0.4660673682008178, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.6898913050782208, - "sentence_nr": 27 + "score": 0.05089649055811939, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.8620687741940413, - "sentence_nr": 27 + "score": 0.332316583093035, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.6898913050782208, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.8528837782425732, - "sentence_nr": 27 + "score": 0.4226166554364405, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.7708719635370461, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.8793197587693242, - "sentence_nr": 27 + "score": 0.4257191895245898, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.7708719635370461, - "sentence_nr": 27 + "score": 0.08255714494862634, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.888538633093067, - "sentence_nr": 27 + "score": 0.4304002627139641, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.6840689169974626, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.8314419144081646, - "sentence_nr": 27 + "score": 0.4447505575009147, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.5819799380263497, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.7407958979814505, - "sentence_nr": 27 + "score": 0.34130342683703757, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "bleu", - "score": 0.7645786047678913, - "sentence_nr": 27 + "score": 0.09309060319054646, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "th", "task": "translation", "metric": "chrf", - "score": 0.8655501219338723, - "sentence_nr": 27 + "score": 0.4298405489249517, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.41098733201100757, - "sentence_nr": 27 + "score": 0.13308739447486365, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.651283133493195, - "sentence_nr": 27 + "score": 0.3932447622969156, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.6152755816095169, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.7669297251133314, - "sentence_nr": 27 + "score": 0.36741937011390374, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.4250002996145258, - "sentence_nr": 27 + "score": 0.11220450894323894, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.6670552714553488, - "sentence_nr": 27 + "score": 0.46129962837218175, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.3735617779670567, - "sentence_nr": 27 + "score": 0.1495364895327543, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.5773479111816255, - "sentence_nr": 27 + "score": 0.47363863746713725, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.5543498698280007, - "sentence_nr": 27 + "score": 0.09996446612314541, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.7266847297604082, - "sentence_nr": 27 + "score": 0.38170822021413087, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.3716332023564544, - "sentence_nr": 27 + "score": 0.04590350535783419, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.6132388888021502, - "sentence_nr": 27 + "score": 0.3590907949514895, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.6986939462620247, - "sentence_nr": 27 + "score": 0.07468620780110702, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.8497711598086016, - "sentence_nr": 27 + "score": 0.3836013751816852, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.5072570733389083, - "sentence_nr": 27 + "score": 0.10539949748136965, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.7124868368374351, - "sentence_nr": 27 + "score": 0.43372525820935726, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.5907596734005102, - "sentence_nr": 27 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.7837270250239556, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.10008881112800158, - "sentence_nr": 27 + "score": 0.08313078148769443, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.29125356488795046, - "sentence_nr": 27 + "score": 0.37751928853187794, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "bleu", "score": 0.0, - "sentence_nr": 27 + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.041649157343430596, - "sentence_nr": 27 + "score": 0.0017825311942959, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "bleu", - "score": 0.6587480145435196, - "sentence_nr": 27 + "score": 0.060825626903085836, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "kn", "task": "translation", "metric": "chrf", - "score": 0.7917841426705801, - "sentence_nr": 27 + "score": 0.367772007695087, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.7446828000198126, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.885521980076414, - "sentence_nr": 27 + "score": 0.3645369664653625, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.6466833757622275, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.7737914417145209, - "sentence_nr": 27 + "score": 0.3275292968031138, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.4447278656331358, - "sentence_nr": 27 + "score": 0.08932983819566953, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.6742569711624775, - "sentence_nr": 27 + "score": 0.37462132890676997, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.600047216971444, - "sentence_nr": 27 + "score": 0.12151683896637884, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.7511423755179258, - "sentence_nr": 27 + "score": 0.3874631848880938, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 27 + "score": 0.10204941450542204, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "score": 0.3642762802151916, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.3382340617900419, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.6182585373365673, - "sentence_nr": 27 + "score": 0.2954316287998063, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.6069548573053054, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.7630436854704967, - "sentence_nr": 27 + "score": 0.38351345508808277, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.40482952759410495, - "sentence_nr": 27 + "score": 0.08121271060180286, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.6241130944295542, - "sentence_nr": 27 + "score": 0.38347124715279823, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.5021718181363274, - "sentence_nr": 27 + "score": 0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.697189669759932, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.7858164289172753, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.8717639062922423, - "sentence_nr": 27 + "score": 0.346134163535414, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.5731680012014568, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.746935173521359, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.7224037170215811, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.8452672523905139, - "sentence_nr": 27 + "score": 0.25122591039975606, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", "task": "translation", "metric": "bleu", - "score": 0.5724496367057007, - "sentence_nr": 27 + "score": 0.142144689462689, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", "task": "translation", "metric": "chrf", - "score": 0.7350859720106757, - "sentence_nr": 27 + "score": 0.4331731101712559, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.42250552136302394, - "sentence_nr": 27 + "score": 0.15453746478246141, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.6425389837629188, - "sentence_nr": 27 + "score": 0.4413516563123831, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.7645048342610411, - "sentence_nr": 27 + "score": 0.1383193561213217, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.876234192352485, - "sentence_nr": 27 + "score": 0.4229717720106369, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.45751787171307623, - "sentence_nr": 27 + "score": 0.14846392828893068, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.6647794363792763, - "sentence_nr": 27 + "score": 0.44939103256256696, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.7623067286250759, - "sentence_nr": 27 + "score": 0.12146424147064877, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.8682092620191191, - "sentence_nr": 27 + "score": 0.4236242053572171, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.46189821859121283, - "sentence_nr": 27 + "score": 0.1094074965643601, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.6442319235751083, - "sentence_nr": 27 + "score": 0.40564547968508147, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.3931991982536581, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.6422735790483707, - "sentence_nr": 27 + "score": 0.21262860902391906, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.44644290381704027, - "sentence_nr": 27 + "score": 0.25472503432861054, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.6892051604181435, - "sentence_nr": 27 + "score": 0.49230982416428504, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.4000177797533498, - "sentence_nr": 27 + "score": 0.13364464646895982, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.645169701736652, - "sentence_nr": 27 + "score": 0.420485716773103, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.4479818542603719, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.6761961025641056, - "sentence_nr": 27 + "score": 0.014058355159408403, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.7123871749204508, - "sentence_nr": 27 + "score": 0.10642944544652122, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.8331784519293958, - "sentence_nr": 27 + "score": 0.4272539643561774, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.5749089871602278, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.7211428196508521, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "or", "task": "translation", "metric": "bleu", - "score": 0.38506289173931413, - "sentence_nr": 27 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "or", "task": "translation", "metric": "chrf", - "score": 0.6152360906748179, - "sentence_nr": 27 + "score": 0.11665236403515139, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.6231488481063673, - "sentence_nr": 27 + "score": 0.10713148568717314, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.7734960210241439, - "sentence_nr": 27 + "score": 0.41522111700393083, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.6833569517560225, - "sentence_nr": 28 + "score": 0.3539070801331386, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 + "score": 0.11340129142744679, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.6833569517560225, - "sentence_nr": 28 + "score": 0.4168800407013454, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 + "score": 0.07438681343481453, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.693261298341864, - "sentence_nr": 28 + "score": 0.3894532190798538, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 + "score": 0.12289012856297825, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.693261298341864, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6745016003476486, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.8511670783317596, - "sentence_nr": 28 + "score": 0.4301477375362509, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.32329508170352383, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.6141330847741713, - "sentence_nr": 28 + "score": 0.1585163492096374, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.6833569517560225, - "sentence_nr": 28 + "score": 0.43788019223348373, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.3837983925863447, - "sentence_nr": 28 + "score": 0.11765941642483725, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.6366757448341102, - "sentence_nr": 28 + "score": 0.44984502263523063, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.6745016003476486, - "sentence_nr": 28 + "score": 0.12108876184031253, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.8511670783317596, - "sentence_nr": 28 + "score": 0.3771925448240792, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 28 + "score": 0.3814511364616612, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.6667025833042813, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.746973053424487, - "sentence_nr": 28 + "score": 0.2862195367689212, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "bleu", - "score": 0.6745016003476486, - "sentence_nr": 28 + "score": 0.08824413655138029, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "pl", "task": "translation", "metric": "chrf", - "score": 0.8511670783317596, - "sentence_nr": 28 + "score": 0.3977541835364748, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.217554942150074, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.4859163400220353, - "sentence_nr": 28 + "score": 0.3702987017023586, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.3008656294855478, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.5198655773563042, - "sentence_nr": 28 + "score": 0.3241317524160092, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.3008656294855478, - "sentence_nr": 28 + "score": 0.07351652222518425, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.5198655773563042, - "sentence_nr": 28 + "score": 0.3862617013651048, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.203264842568494, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.4965705242699611, - "sentence_nr": 28 + "score": 0.38224927613981324, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.32079058840140134, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.5094305382960898, - "sentence_nr": 28 + "score": 0.37754295227618245, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.23693055763743093, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.4968400811224627, - "sentence_nr": 28 + "score": 0.17228284869075539, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.32079058840140134, - "sentence_nr": 28 + "score": 0.10455435536860881, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.5094305382960898, - "sentence_nr": 28 + "score": 0.41817390114586295, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.35479105265934485, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.4725761870926308, - "sentence_nr": 28 + "score": 0.2594621783720232, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.3301899334885226, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.5632801217523468, - "sentence_nr": 28 + "score": 0.1974952222966699, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 28 + "score": 0.37756676543137707, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.1923904871441659, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.5825915593253297, - "sentence_nr": 28 + "score": 0.19258818005939538, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "bleu", - "score": 0.32079058840140134, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "ha", "task": "translation", "metric": "chrf", - "score": 0.5094305382960898, - "sentence_nr": 28 + "score": 0.3162255423673242, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.4892199210635081, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.6263002679299042, - "sentence_nr": 28 + "score": 0.30718853768673293, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.09147827112247602, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.3360691966057836, - "sentence_nr": 28 + "score": 0.3499024158832446, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.2966218714191134, - "sentence_nr": 28 + "score": 0.12587301409115934, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.5348497180679597, - "sentence_nr": 28 + "score": 0.43278573034203477, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.3008656294855478, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.5209701084013916, - "sentence_nr": 28 + "score": 0.08072859763900794, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.3254074668234594, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.540582703782851, - "sentence_nr": 28 + "score": 0.3683463348059566, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.22935466869603194, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.6357138961264384, - "sentence_nr": 28 + "score": 0.16140904075314855, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 + "score": 0.09640029388493841, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.6833569517560225, - "sentence_nr": 28 + "score": 0.44383188407096436, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.38769943713308697, - "sentence_nr": 28 + "score": 0.09092206673648158, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.6179897670313796, - "sentence_nr": 28 + "score": 0.4200146131210127, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.35964066074252593, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.5418421848087059, - "sentence_nr": 28 + "score": 0.27627389434334787, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.31666472263798334, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.5096984883597744, - "sentence_nr": 28 + "score": 0.3701108638788564, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.2656621439255861, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.47187800221660153, - "sentence_nr": 28 + "score": 0.10823255315380634, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", "task": "translation", "metric": "bleu", - "score": 0.41583634222861793, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", "task": "translation", "metric": "chrf", - "score": 0.6558319092753532, - "sentence_nr": 28 + "score": 0.10849792605943348, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.26633048164380024, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.5038200170930055, - "sentence_nr": 28 + "score": 0.4312123024580457, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.5371525807924681, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.7677378485184402, - "sentence_nr": 28 + "score": 0.24946780875926136, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.15274299622833287, - "sentence_nr": 28 + "score": 0.06500924965575555, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.4692950277268683, - "sentence_nr": 28 + "score": 0.389301118498321, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.30626101600123445, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.583891679561264, - "sentence_nr": 28 + "score": 0.42962123952100073, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.18137691349228668, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4586072719105437, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.23443677523946913, - "sentence_nr": 28 + "score": 0.08380035569969414, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.5163278972706644, - "sentence_nr": 28 + "score": 0.41608423823971435, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.33876931708826047, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.550413577565279, - "sentence_nr": 28 + "score": 0.3682635438782073, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.14207405313947058, - "sentence_nr": 28 + "score": 0.1643756453595719, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.47874702297210975, - "sentence_nr": 28 + "score": 0.5131518108984869, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.2539342198718324, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.46375067718601715, - "sentence_nr": 28 + "score": 0.41902495095742714, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.200726550812963, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.41645295439394076, - "sentence_nr": 28 + "score": 0.3645487079754606, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.42995245074388394, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.6515566568079457, - "sentence_nr": 28 + "score": 0.3500022174766425, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.2834052290575623, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.4974109921343301, - "sentence_nr": 28 + "score": 0.3013590931650816, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", "task": "translation", "metric": "bleu", - "score": 0.19454290935168927, - "sentence_nr": 28 + "score": 0.11220450894323894, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", "task": "translation", "metric": "chrf", - "score": 0.49909763892228687, - "sentence_nr": 28 + "score": 0.4103718597593798, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.3837983925863447, - "sentence_nr": 28 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.6379993550810827, - "sentence_nr": 28 + "score": 0.29873361351172023, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.1481394578697113, - "sentence_nr": 29 + "score": 0.08186981924084771, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.30063818852404856, - "sentence_nr": 29 + "score": 0.36422083962860535, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 29 + "score": 0.07752927781917028, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "score": 0.3238609427019678, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 29 + "score": 0.10183633383410681, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "score": 0.3805172880929802, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.14216645907653844, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2737034564138708, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 + "score": 0.2704106810852134, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 + "score": 0.05255579792886986, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 + "score": 0.4115265273644293, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.14939354788683526, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.29041654772860626, - "sentence_nr": 29 + "score": 0.33709419207258606, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 + "score": 0.12459211652309463, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0, - "sentence_nr": 29 + "score": 0.07774648652101643, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.0, - "sentence_nr": 29 + "score": 0.38100957871754465, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 1.0, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 + "score": 0.07860263587184375, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "my", "task": "translation", "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 + "score": 0.37524253175141375, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 + "score": 0.3092395616495983, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.5420662441541858, - "sentence_nr": 29 + "score": 0.0811151580341062, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.5445089463670787, - "sentence_nr": 29 + "score": 0.4179228886149028, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.40919282596076484, - "sentence_nr": 29 + "score": 0.04318453178079916, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.5542936932152527, - "sentence_nr": 29 + "score": 0.3381884955798567, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 + "score": 0.18025686784380132, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 + "score": 0.25002690670423616, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.5928902071159559, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.647817438132439, - "sentence_nr": 29 + "score": 0.015512263616295723, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.5928902071159559, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.647817438132439, - "sentence_nr": 29 + "score": 0.43293305745010263, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 + "score": 0.32529077705920345, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 + "score": 0, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.34641959937802264, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.47549559716182727, - "sentence_nr": 29 + "score": 0.3897147415993498, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "am", "task": "translation", "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 + "score": 0.2801941853357009, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.42461633178803443, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.5603699277937889, - "sentence_nr": 29 + "score": 0.15333726274185422, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 + "score": 0.14176967102285878, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.2340216139262901, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.4522093023662336, - "sentence_nr": 29 + "score": 0.15856726741880453, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.4132352454218328, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.5544725906870476, - "sentence_nr": 29 + "score": 0.2830740020655188, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 + "score": 0.1386688771726747, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.3951500216160541, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.6089660957340174, - "sentence_nr": 29 + "score": 0.1303104091598296, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 + "score": 0.11487251192182539, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 + "score": 0.3838034014383599, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.42282359171428024, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.5395092365663595, - "sentence_nr": 29 + "score": 0.2758428491606746, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.35412968165085734, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.4985795126785612, - "sentence_nr": 29 + "score": 0.22090835035926976, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.1598921499894403, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.390187618292215, - "sentence_nr": 29 + "score": 0.3394587857367724, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.2400540439585043, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.49297433772099697, - "sentence_nr": 29 + "score": 0.0008865248226950354, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "om", "task": "translation", "metric": "bleu", - "score": 0.4806604068305994, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "om", "task": "translation", "metric": "chrf", - "score": 0.664228268001068, - "sentence_nr": 29 + "score": 0.12451389734392344, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 + "score": 0.06061016244701235, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 + "score": 0.3480533968220821, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.2340216139262901, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.45184273575809186, - "sentence_nr": 29 + "score": 0.3107132702855867, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 + "score": 0.06622410994100032, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 + "score": 0.42506963891617355, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.16533113836624475, - "sentence_nr": 29 + "score": 0.06510536366860005, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.4074791764578974, - "sentence_nr": 29 + "score": 0.4267595335841956, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.28547397706062927, - "sentence_nr": 29 + "score": 0.0908223691374129, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.4838477808123968, - "sentence_nr": 29 + "score": 0.35086527201687273, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.6053011982655683, - "sentence_nr": 29 + "score": 0.0504505902029893, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.652613765735072, - "sentence_nr": 29 + "score": 0.3325735546737679, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.4229247984636106, - "sentence_nr": 29 + "score": 0.1102887395214814, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.556465536088555, - "sentence_nr": 29 + "score": 0.4443176362793868, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 + "score": 0.3452527406212403, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.3471790743028735, - "sentence_nr": 29 + "score": 0.04445259375670958, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.4458106286047354, - "sentence_nr": 29 + "score": 0.3108884123742351, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.3555508425572384, - "sentence_nr": 29 + "score": 0.06793347054687501, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.5387745992013905, - "sentence_nr": 29 + "score": 0.37328029958450787, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.1709686260975486, - "sentence_nr": 29 + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.3940091304204109, - "sentence_nr": 29 + "score": 0.0008383635144198525, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", "task": "translation", "metric": "bleu", - "score": 0.25958657290343434, - "sentence_nr": 29 + "score": 0.10082211195764058, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", "task": "translation", "metric": "chrf", - "score": 0.43162699627918094, - "sentence_nr": 29 + "score": 0.39430575805608015, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", "task": "translation", "metric": "bleu", - "score": 0.2213908395073965, - "sentence_nr": 29 + "score": 0.08459573412751416, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", "task": "translation", "metric": "chrf", - "score": 0.4213527844474163, - "sentence_nr": 29 + "score": 0.4172605432414846, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", "task": "translation", "metric": "bleu", - "score": 0.39696685122270786, - "sentence_nr": 29 + "score": 0.08986406706995408, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", "task": "translation", "metric": "chrf", - "score": 0.5497060467823045, - "sentence_nr": 29 + "score": 0.44470674434718094, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.4109749814872678, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.12508210748173035, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.43052214251110127, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.15601281434649325, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.15226733582025143, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.39627479466482446, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.2512105738558467, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", "score": 0, - "sentence_nr": 0 + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.07135414938965279, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.3801021795977089, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.0007433838834374071, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.3362234868051281, + "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.07565762629954577, + "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.3606232238015037, + "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.12594843055469976, + "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.4353555563309006, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.07142908588092715, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.3642310370662869, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.07679233641842272, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.3862824156465965, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.06574770404484663, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.38086180678047993, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.13105556640848193, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.15600871137617922, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.4717352325952083, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.08673909463463786, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.39672072250261947, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.15095832595105924, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.46999349033543664, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.3145998179666375, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.13717476208873386, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.4016394464190868, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.3186898662502609, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.06656213940646744, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.3842510919126927, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.06946125044973972, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.37972229376763555, + "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.06278759018603328, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.3835626087751843, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.07801890264772814, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.3553195236646342, "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.02074180194079426, "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.25643534797086653, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.43976907726931086, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.10101113530957895, "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.384568799517898, "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.31773604252301485, "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.23436994609974687, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.18083281963129427, "sentence_nr": 1 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.056200079175203074, "sentence_nr": 1 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.3452518222522092, "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.15996142821020284, "sentence_nr": 1 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.42995669154818883, "sentence_nr": 1 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.06922310590511903, "sentence_nr": 1 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.39694083278594716, "sentence_nr": 1 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.1339039164909805, "sentence_nr": 1 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.44979655276903346, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.22159156633820476, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.47469899099393, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.13857603724877052, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4531342308478503, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.06775586518289999, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.36597017334019843, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.08533222289741706, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.40809636487511675, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.11006586190644709, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.44843770079885176, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.11791514636772135, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.45829245019901393, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.1384494600711195, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.41953659012152644, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.008635616559445383, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.16760779378610222, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4362677560900551, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.16234678312329395, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4114313966468408, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.14108777831558816, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.38610201135781486, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.07152747748412269, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.38927458491364797, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.14163299203710986, "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.3918120503690342, "sentence_nr": 1 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.14163299203710986, + "sentence_nr": 1 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.3951425639999114, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.13959457580667745, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.39278180480046854, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.212269197708987, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.432532095093549, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.3992714815075659, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.07124462426516306, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.41510187108032215, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.16580720845461236, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4090525704213402, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.08630492424721987, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.3317478812781943, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.07256581912261388, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.3778626622264389, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0889604331153271, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4174106361046784, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.35972456016417403, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0857750978817917, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.44136113805162547, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.12439394521251071, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.48382498181532896, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.09453418134278709, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4530199895993529, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.3988248173608407, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4420660206317646, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.1200100437012302, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.44442321598107215, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.1651448129209979, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.5035927049098079, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.1057412571039566, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4429339067969458, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0645756376303251, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.42302002481751566, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.09402885411258183, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4323274751516209, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.29764050036303846, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2883740704360469, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2604470328007762, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.267065521919161, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.13148666942655857, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.12243763324014527, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.06752080860674345, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.3920675082035874, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.26975886482101524, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.23716589277972214, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.25934628189580383, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.14394171731439506, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.25058279102061404, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.2750949112536697, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.369396410785335, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.30886876402238045, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.06639671070426982, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.4050079022771937, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.32906029723232294, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.027675420219657812, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.16631693106339326, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.43025731672242257, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.2799307164966019, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.1910051568962051, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.31549317456416015, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.15854552704770836, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.30382216559902564, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3445489778722215, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.22034235744543199, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3992343412084987, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3768445224292385, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3029170225422197, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.14327475199246492, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.4396910566965852, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.18103783785102034, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.33182702236316497, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.1275824962151066, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.36765996925422534, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.07319259674455142, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3270299166479025, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3689308416952914, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.12454093367377822, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.45400457519342263, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.29211251612445716, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.08742637130044478, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3782754387193616, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.4276317128610827, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.08016111055639634, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.4056564162743549, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3471416148922459, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.4473497453896118, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.31639773530374476, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.26469697944333787, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.46740195549409447, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.062198321250135094, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3837740336383876, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.07506636604929029, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3797922048220493, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.3843618124722185, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.10567309578898446, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.4024349171516437, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.32603788130544104, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.1626633565563655, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.16242124033839386, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.14377784410436356, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.12860232766612728, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.4883034327593629, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.4175745705628701, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.26376811155857266, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0683913918538176, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.31964124359750967, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.16961420464787738, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.3243082758151494, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.13894512516215204, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.483078120317575, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0832724096908118, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4097982251907115, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.09843551021314972, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.44345815368179514, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.14797957986848845, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.513739080081145, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.19557790190470636, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.5363764564720104, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.39516134977471445, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.13548943675142955, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.46561819719767894, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.06222483146893852, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.3832766336265944, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.1264407220858752, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.42491280666939946, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.13488308637805477, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.44243592943016613, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.17119491085533964, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.48716868178651374, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.13564915029310812, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.5124402132264054, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.15357179047039304, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.4395965605263733, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3352216651363677, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.2926736955448575, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.33910710471992317, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3068634134265278, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.21066917744439353, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.11319164831477802, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.41093314858673247, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3662862822569537, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.400596854878379, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3536122668781117, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3877466554678465, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.058474735537506775, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.346711996349685, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.32876484301179987, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.14849717699290216, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4416362145529488, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.11642798279128005, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.439412293625208, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.11467045422639609, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4118325813270988, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.04984963984762715, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.34056245241432903, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.10648557917276309, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4293616798847581, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.1224748001318708, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.41768426353539356, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.08812222855524378, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.3956725101287399, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.276493585243019, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.35438832887300664, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0862684017016977, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.46311606179286086, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.08054744999594665, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.3910533825433727, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.42292922955918455, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.08905416987582906, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4634600994908148, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.09910184808869367, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4062669521282858, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.15324215252205037, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4546414930698417, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.13705597155185278, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.42086210633701837, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4018106536047614, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.10183633383410676, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4751488277953516, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.08115687813068284, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.1402219074856109, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.45645548689619725, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.14370950122782516, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.3334514572265135, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.21268091254698024, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.1520044508572736, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.14754324776249525, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.1493596720301927, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.13418234666119208, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.42515671066046573, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.16498388417305654, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.348862726257721, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.15101184486338365, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.15175485095994987, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.12067839739874531, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.3233637515119462, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.31176467991525436, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.29400349942844667, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.17262205720154453, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.1656224349286263, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.08879506158981211, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.347971425272793, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.25467468537876675, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.23230258637043677, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.3038489261116855, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.021201194751007294, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.29393541924621686, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.08160236983918483, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.38567653709947824, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3636314071779547, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.07352808725672978, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.39849401484916575, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.1601444942465269, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.4576200820848031, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.07609797325833854, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3951828103961763, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3304089312918686, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.4573525700324587, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.09226934981186162, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.40757362998605645, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.0736975215393763, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.450470065659465, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3454543703976745, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.39175069197918183, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0948243550653547, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.40688622415675096, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.37941443386230733, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0754791629755296, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.4115037991203147, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.1444984020870621, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.4485053623705821, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.14266515060023502, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.4205762044754387, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.27437048069985176, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.1062877417466757, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.38750616531019433, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0622376426945986, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.3214294905599617, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.09850470636402667, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.38526614122005187, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.3759239923327034, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.3693281437350684, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.1257482552973572, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.396228613282852, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.17376142320673926, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.4494840281694199, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.09628144140511948, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.3988415038006601, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.08810203169380636, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.4085631076024389, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.10152372886902537, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.3274648018276542, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0656946802681167, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.39145078362684715, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.22759795317133963, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.404462271481354, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.08828528448332856, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.3845635683167561, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.118369067191548, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.3570955668970739, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.24782633328770076, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.1056222479945408, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.4147493978520222, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.3490597215692333, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.363944181125048, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.3406214634850959, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.36559164703469177, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.34592988636867744, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.2503407008316354, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.18358987342790867, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.47842211510000643, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.40917168792265945, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.2907747554493043, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.37948468164443433, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.23231306174211885, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.29313442741821516, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.15565663466238167, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.45102089786807525, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3349337342755207, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.19306612958933164, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.4566094829965023, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.08247696970055073, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.4056732117408629, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.08599019784098516, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.38046480599975824, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.0256345300045686, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.43183586900957266, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3063541349224814, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.11947870588075608, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3759204930158301, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.30820582392513496, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.24438387922480115, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.09603814203629989, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.4321181025319477, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.15165087037620367, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.10988031996776393, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.14384707977041108, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.15154293554201603, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.15332196439486498, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.1366803905961902, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.26639298093015656, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.22666622746450207, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.12132942020746357, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.13870031722176082, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.14848492233400512, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.17231483245958562, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.26083297460286664, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.2205187870837211, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.1680732599075483, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.1612742953037833, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.09719895896945802, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.13288058909850656, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.423605439146263, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.38162976993876024, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.04281165799178987, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.2509007123366836, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.20795243503763636, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.14465863776176832, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0937099995586274, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.38638693017010634, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.06087893264282183, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.30404764547641244, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.41200704988717746, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.3886887515710143, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.14004327215895437, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.40026907984156535, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.15629747573563804, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.3486357126192648, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.3714248697825236, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.04255423670382886, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.2545212986668611, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.3669589828288568, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.2877294168054545, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.048298770203824865, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.356719915230056, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.109333912337143, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.43946533504329827, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.3875031655866923, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.14527200081334513, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.4245172781893951, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.14491394634037813, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.42283276725533414, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.10635098896649771, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.43764629149971646, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.3774972710926166, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.10434084599663213, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.44542543341868346, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.13212441564006142, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.4851083447910682, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.08158761703149583, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.41046961809624866, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.11015904355748653, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.44168399257521335, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.38182147212430423, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.08774452514246735, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.45281303506820253, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.12099786399361606, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.4447762461237164, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.35906265614758676, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.10096459770150681, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.4189740217714419, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.22294850195164284, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5198779161956808, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.05462476108190564, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.3939806692426178, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.29176887749452535, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.12489813745855237, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.45100840448088525, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.17572052479780473, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.4667387769636358, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.4278609837970672, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.43910094482734396, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.3506155502407636, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.14055652045127187, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.473241590986821, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.15711076787374778, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.4926610996660017, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.12253628106911543, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.41098604819939544, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.08517707813747888, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.4298965032520897, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.1822605494174467, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.4674281825701334, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.15060224138362507, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.4960433081987429, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.06416670621115099, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.37914976744210205, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.16678437441301863, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.48024716052007455, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.19769254283956292, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.5134092833700632, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.10351511568904229, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.3512771276108106, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.12894382376610666, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.4774936497378186, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.2822733965937896, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.18568750775369716, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.5053829905789087, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.13594665641498668, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.47621282367548656, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.12066287439499573, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.40225318320388664, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.16231893029395061, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4858308027555531, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.13721346938175555, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4615942596492787, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.07617542321559437, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.39858919565540674, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.07090251712658449, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.35520940942408, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.14262794392495703, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.5232138614315397, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.16700393857092563, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4647429119227333, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.11244423294201641, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.39638666871963296, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.044924326658115875, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.13664244796691394, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.43278158199649547, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.3781094023262652, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.052244516140907096, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.351436961102141, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.26865126568707876, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.13407128770012228, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.1426346476984381, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.10415314128924848, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.1266453888967545, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.43624812371685906, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.3105036235047128, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.30259031153099203, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.000862663906142167, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.1494595377548235, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.17419420900027405, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.07291105107725455, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.4474906527730671, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.08334085822278188, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.40282723830388284, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.16487909586055954, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.44274512789286224, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.1360307408435953, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.39263095823622246, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.09480078705006485, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.44714457710624717, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.09262646486676755, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.4550476833458679, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.048201474120811695, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.274690061108597, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.11701862696300212, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.46140785066979895, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.09775507836117996, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.16354663154362192, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.33037125702748205, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.08714838249931423, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.3692825189624461, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.09127577115927074, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.3300669374207929, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.09136190831662618, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.13220679623213535, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.11092163750960961, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.09283959660667528, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.3978483671635258, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.32927950263936856, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.10056314185255186, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.06992171867383007, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.3707212650272349, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.0008790436005625879, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.13930120799883589, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.3231709973877731, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.1680802224904863, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.08434660455803612, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.32335639685468925, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.1634625788420781, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.16025807894446958, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.18409427327298736, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.04649593117719446, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.3123287906475391, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.3333615125436495, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.05397266389085381, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.3006666778870249, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.2211338091172957, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.13172601051893773, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.31543864697695867, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.909878624371155, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.9494599978334789, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.6484538568755306, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.8387015535622947, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.9625248317849852, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.9799603794887166, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.39650106263626994, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.68092489158442, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.8770669977168674, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.9465430749689367, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.447394559424499, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.7668788405207637, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.566119739615098, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.7342868017789353, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.1126885377086926, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5312741490187041, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.4106104060507146, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.6978219979604, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.9625248317849852, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.9799603794887166, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.823872392210652, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.9211718482852349, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.17236959754271308, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.17066745219661572, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.17964021028231922, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.16772440591199625, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.1761234687731259, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.2873490640300303, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.09041416838409135, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3890881523525476, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.09240248103148029, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.40196193955157544, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.04294724676074863, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.28289709141495645, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3264316634099254, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.27836308064875176, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.10044732284778746, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3430602024503212, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.05270938682743268, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.31252925174187013, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.1075481111616894, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.3738814601144911, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.15334066204940114, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.15663428928670334, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.14942771278303218, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.16153757205147629, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.15592867267790575, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.46677119539598194, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.12983585863356562, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.40817978682363515, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.30560550161855565, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.05132579587372299, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.2912647488684406, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.1179671428128192, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.35902184995166087, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.29668758510830123, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.22229988701881476, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.151253555027421, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.20433140304565395, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.3418929341143443, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.27032439709340095, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.08584237196597336, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.36740704811992303, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.17118252592469316, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0529715946034933, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.35557346479092056, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3447636250916266, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.36010213387059153, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4082310711203842, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0649353067551241, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.40762080099643877, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.06197569493404185, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3338415573583233, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3967113999971865, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4128330698627673, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0722283716922605, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3701322940114362, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.35685082927498357, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.05664033266477341, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3784614210934525, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.08114944360388783, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.39320041394379696, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.1569159469136538, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.14702176025137792, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.15207203397909086, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.150473170651542, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.15718151788438975, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.11334755958850191, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.15677069289729273, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.2916166730601614, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.2596756015668518, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.16085808831478232, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.03602534230965631, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.17980618443164004, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.38870674200492367, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6484380084879691, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3961285597009415, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6148751441350505, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4923751299732868, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6853756490381199, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3996712647649035, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6353525755760105, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5115346945020283, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7037574715738644, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.24715873794308874, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.49051792813181655, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6152980280400979, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8311281590297233, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.24508104771894088, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5725552336126134, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.33608213382072566, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6155314069125684, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.20801258614305904, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.26703508536995574, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.35315040956049437, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.625895188503691, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11133996756497437, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4410280353998367, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17374951565433233, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.45325597884524305, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17743299460161885, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.43071271897416463, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.16052654068024738, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41580120868053494, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.05963579607071745, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.31139762378406344, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11346446511593337, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3675317022605926, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2377604053257556, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5662768009060447, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10742716472890976, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.42694859148910824, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19073363590503933, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.49895382941569383, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.14745870033404418, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.475170637938921, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.26459538953931094, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5272178908335121, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.26801022984888695, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5654883864995515, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.21665407194210906, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.4344921442639243, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.2735429726790281, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5644723203818537, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.20223322445648179, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5084057058209687, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.012201453805310429, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.063050817196087, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.2868985878682555, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5832214090256616, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.25848476545940924, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5525933856866961, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.13019082899297843, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.40512126305429846, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.24071298960902482, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5438509851618877, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.2063890416514164, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.38567678850872256, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3295566054952435, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5816133441895466, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.3563758622144919, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6037023613177924, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.3574583793293068, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5924115119819969, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.37994652561206577, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6464467277069994, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.4206507730319955, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.678851303587664, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.35367180741660353, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6344846206551544, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.2632018059331281, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.501302719796297, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.45286243450930924, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.652736896100196, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.20586736678432452, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5693016623172978, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.27075075499555246, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5374328610523021, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.1455399826828606, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.4504825146558032, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.175866555062937, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.2758687846643748, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.2948978498692003, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5529453973837751, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2158914621804855, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5448184155666022, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.22292726306270316, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5653789747970112, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.09362261118571368, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3452056942265759, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.26930937054323245, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5410704185827219, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.24634765861867908, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.55968513851572, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.39152357647177133, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.3608351361947582, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6215415104007418, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.08175340974854195, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4308342322390109, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3185785286756486, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.3268233487541633, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6084114123608597, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.04759937639788563, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2500653935141143, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.585528867886047, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.18031307339768174, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.522164454804456, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.21403222128228389, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.563121432204311, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.18917620656425485, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4346170232980484, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.18505378795140082, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.47051087423292237, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.09807167131529582, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4646043403137081, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.037874984245935134, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.13083094614009624, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.3522044895579804, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.626995375818154, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.20064110494011925, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5205761630334527, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0684792839692368, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3138898863773231, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.1573857459340795, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5347526444819753, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.008180069062416927, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.04605877529742035, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.13904320686250593, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.47169365083525167, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.2999092588227898, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5505916495384416, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4054983797456263, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.6264774230839022, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.420450507904553, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.6503146347305717, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.43870712112271204, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.6525926696001584, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4207445490015154, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.6496192656497308, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.2772655014585435, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4799723286048352, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4577065720375266, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.6729985527773988, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.2516768028374535, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.49572209766846287, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.2958351954606211, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5202221091638364, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3471036105446511, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5663019495273462, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.009070964338765818, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.06852404470758497, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3212066202235163, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5836558214123343, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4263684749347053, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.20051119758906127, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5334791309401924, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.24894072982768842, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5212235893093335, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.23724642034775328, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5175129869169551, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.1849419409628554, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5067677916637257, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4228574070038002, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.2737513622788043, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5787848381439354, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.11126509848873964, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4338923576538663, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.31311320826536454, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.17236491061326006, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5548663878579595, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3540506408782035, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.08906092883748383, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4317746285352776, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2562849004088193, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5767019342009202, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.3535002370419364, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5959879218348465, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.393613605227227, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6492198447661237, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.393613605227227, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6492198447661237, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.38333108639273095, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6252821653079126, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.19851743023355672, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.49793621556542356, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.3878152533224771, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6512757512743351, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.10954782904363085, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5090382887002297, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.1614809742656655, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.4145218112165384, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2834484329788497, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5201572704778937, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.006569332862878646, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.060864196135666904, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2756885721075884, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5867077870431389, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2465888500427759, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5221084445696768, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.35983766090218355, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5862251404739759, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.21147734744561483, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.41020178654369294, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.3563982585943877, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5378970484635915, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.1510722413165652, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.43592329727028295, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.13240628161243978, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.3347576434758551, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.45865525158396653, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6547109311136894, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.31372333533981844, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5741396495481692, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.4583603882613907, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.671355324267905, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.10077062063331403, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.09760482860544632, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2600884210903425, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.531430106996609, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.22150370805587954, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5463488388082953, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.2971752224486841, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.605133664481872, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.2329856851831642, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5405751250637106, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.2563564295134795, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5499025328773104, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.2500653935141143, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5098952451698188, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.14574402656519908, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4275543759804943, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.3410035628829697, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.6673519558586546, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24750028117795922, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5894646098566614, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.2687379663485886, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5826619907747026, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.15897333608001968, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4496168003395693, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.15089318423122544, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4042478943311393, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.39461811323775403, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5655204109921267, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.2786312783602775, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4836796407825139, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.41756686236967944, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5616829345739638, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.4205004825822372, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5674537639314233, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.40974323819644953, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5368112087257564, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.219672574669477, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.37413906702142435, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.45613731449823464, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6655742412177843, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.3116520879159789, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.47517792402030584, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.3627923367798331, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5255399246733422, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.12870376210497989, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.22151451171035633, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.30181468526956173, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5261802780475523, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.25564177137418986, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.49870011615602194, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.39579112101105834, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6431490866428237, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.38189567401226293, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6154314825900052, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.3436153961225413, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5954254642696512, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.4339219137216798, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6338401824373191, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.27946415227589155, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4938296655037709, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5621669807462487, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7520069792061377, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.26925601229087914, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6050395148484196, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.27336087678628246, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4919779927233182, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.43235877156651625, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.614485867381761, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.012951112459987979, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.11882277038397698, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.3272963527043486, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5715613564297359, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.15985840708020788, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.44951053332729884, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.35253338922743144, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6487975154557831, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.2126707920684064, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4659908460634765, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.2914880531303981, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5962886968213414, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.23944666570758283, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5106509239874657, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.17920531400657588, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4522763055702811, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.356290219128095, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.669942335348411, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.26199400535088346, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5804827870380099, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.13442725522288548, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.39067504005337655, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.20533250289138671, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.49681810344665644, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0950330051810703, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.2535554509913635, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.20378172261136207, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.48600008237332104, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.27217589854489177, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5756343666825848, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.24513414885202045, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5476647609559218, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.23240102389974368, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.4973274282641141, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.2516768028374535, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.47249781871556595, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.16105265992626083, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.404377371664668, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.15813859795767055, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.44607340294350173, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.35253338922743144, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6286864313376063, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.20533250289138671, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.45975635079501215, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0698714799763323, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.18217918401705574, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.3077422016953529, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5943673820353285, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.09564571510780719, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.1907009110214351, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.10353153556093725, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.40215410362634535, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.28467215304840787, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4298052820106505, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.28653528640783255, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5053636612097852, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.17979384730979156, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4177311931467539, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.2767429728676341, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.51183750216717, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3192837057100497, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4886865884781344, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.19358934025667454, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.3468163065453778, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.35728152609132297, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6216765378447047, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.24328450115124742, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.42756232255111404, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.383259260976792, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5920204217586964, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.19180992590551618, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.3308002143045663, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.22305706065076847, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5202782711775973, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.15426765225005337, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.45289625960131974, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.31326946419057006, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.591171976889058, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1702602472176709, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4366640707779677, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.2615659486493292, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5126931977939474, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1840585956032067, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5282240694221736, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.004770195810675918, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.03746566558042944, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.35728152609132297, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.6309384943070174, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.23240102389974368, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5335795368341105, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.20801258614305904, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.41520266266344963, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.31487248334376844, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5905881236136059, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.03509992242758199, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.30204473943342836, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5394635390078353, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.7096224667917136, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.8862932371217843, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.5294442646627652, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.7281375072835307, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.933651069586263, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.9586507529693243, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.933651069586263, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.9586507529693243, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.6337520241233826, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.7734740773636255, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.882190724997149, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.943123392401343, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.5842771441222545, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.7733784586326149, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.3359230828063256, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.573086119969458, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.12531520484413727, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3550670027779894, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.7106361351765512, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.8390104107504974, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.8627586293513119, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.8964369716535558, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.9082489095559809, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.9677853954871374, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.33713757310040376, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5731908178757754, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.4162915990459618, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5970097205621886, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3816408219023713, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5784105768028126, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.39234342738825634, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5972186089812455, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.361250819353898, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6105988260114965, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.20134984470993175, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.47593263188822477, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.45319466339683195, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.7044338153663002, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.18236198178601878, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4546828651423093, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.4710304492059704, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6951644805792349, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.20786721468392394, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.22772581789894308, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.36463597249757107, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5937597266341832, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.19129143021561437, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.390473445537339, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.25848476545940924, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4897308313348651, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.18398226639192106, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.37285010531146734, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.18116830735735984, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.378028436956142, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.2539169853234758, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4294871148542432, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.12731505388718733, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.3354785767663773, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.4055828482909762, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6637253055098513, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.2541277777982055, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4083801144711196, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.1350785139238714, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.3131061676292571, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.24814514148154546, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5127275550938416, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0440854328121759, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.053628247089794495, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.2500984051960647, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.510079353939284, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.27057949011516347, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5644281635271426, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.29851690541541476, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6224209860013706, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.26958884543190903, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5631664732610485, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2503955135641583, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5411247834284307, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.26115021337737276, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5520240492306279, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.1676495122493199, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.36959105080753546, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.38364238388665217, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6777989738947793, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.19300403619224038, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.572822450824776, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.20789192174660942, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.40675321225858063, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2799103318440567, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6172633503183605, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.15492402556203205, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.3125746694462918, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.23233851806966574, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5357993047462365, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.24285172240675165, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4655392375590772, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.29796912700911177, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5158892363484622, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.4005296397635166, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.6201785376974677, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.4005296397635166, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.591086403119955, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.3114493863658917, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5682352098535879, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.008072417039197614, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.06388295238713035, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.3990867885395787, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.630492990358084, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.19851743023355672, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.547468247371695, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.341534333789316, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5541872808406454, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.12031041493621579, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.2540681992986826, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.21910942711629067, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4572726847360035, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.07276375309803214, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.38861839385008856, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.17377261603583774, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4342710497791623, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.15956483578595942, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.425693420655628, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.09791579531860735, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.40432986440529917, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.10423563468216913, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.34390006822858976, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.11125382292406938, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.36362589237918785, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.26330018250960563, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5604741582178225, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.16285656455323885, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.41845720590580077, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.1752818941059842, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4632619281274353, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.06150895863726817, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.20484322521277584, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4996688420059074, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.35205535634937346, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5769772651090223, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33631398011857205, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6332428715049205, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.2323385180696658, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5019509292309764, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.19726472415983368, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5084335265908847, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.22897967367089514, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5158963534678644, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.196046355324564, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.43350415347352517, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3234043476847562, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5909130619522283, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.19057582910221915, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.44257167922089413, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.21889549804942124, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.48502913647746226, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.2799506947952143, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5700777642587023, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.2904194745959351, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5399262338172586, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.21108332811806296, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5847750744232335, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.2453238227047589, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.55017080577881, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.22952177306405494, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5279520952576137, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.2950615456579434, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5675143775417766, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.18643403650822063, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5048280010205698, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.1543252261021413, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3797469086855575, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.34749088141991274, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6635709388374615, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.16617179744038174, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.48117026601244495, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.14383758787403153, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.42888743368788473, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.23584494013034235, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5389135951883217, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.011961593826815614, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.10171812934151993, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.27271804425850804, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6231583014699292, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.3520774812078196, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5735788202105873, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.2063529291350913, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.41364248023079064, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.3618488169166299, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5708179622131996, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.44536846829231563, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.626601305779226, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.45286243450930924, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5635810887606836, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.2336554865490948, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.38695981569355575, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.39673576824033097, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.6640084839050983, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.20241924705494113, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4785445547362629, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.3291745670182042, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5666889547785301, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.2026004770366011, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.30270818881631195, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.27353798204430885, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4703704190720731, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1506914981676572, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.49409850038698094, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.10586140133972588, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4674053477944039, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1712766252338756, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5225554962608486, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1549337617358287, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.47063780888858964, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.087593103737711, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.41197148322773003, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.09993298280365949, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.48180385986334856, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.41169955008329745, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.7046826076821049, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.11760297043792217, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5037771891801089, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.31178681104808115, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.6235184561135673, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.0054987473538930624, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.18502495276209577, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.49774903659564634, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.2797290030028961, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5092945860838002, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.2703645496410475, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5129310433304475, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.2709079038456153, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.447458019441992, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.27075075499555246, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5079958750910802, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.19134492872562123, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.4261251088174477, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.043167422631559454, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.14513634182269314, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3428111199165518, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6267480478468405, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.20787389114353938, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5395612843354369, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.19107912313367556, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.47457026427329674, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.18227211511988975, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.26739926509879147, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.2244921781886412, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.48547869147117434, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.26036802768146033, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5255752089611478, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.2492031334256811, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.4923163374806021, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.2907608105126149, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5445465034944268, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.30219157030008637, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5492288689573782, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.2166046272179384, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5246154268333804, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.17714787947168362, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.4959629506636555, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.37819926894757755, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.6132811919990925, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.24793827875738764, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5551406879143232, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.5143871785925975, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.7463887744436826, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.1568616676699092, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.3333409033359294, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.2256379391347521, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.4863283179636851, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.240340920378981, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4624667456597986, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.43104504141832617, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.5953439401847398, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.27907188689389983, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.5093017176589221, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.2276330877377012, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.43289197888514347, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.28652679283739385, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4341260827393413, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.21815383167015925, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.37444773636111656, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.38031866584113244, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.6100833406476935, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.22991579208155866, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4195683527550329, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.3529455532976322, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.5455305267266531, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.2119156724475127, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.37835612878198044, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.23150355132919254, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.48517162463112556, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.35592474790742606, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.5565115125775245, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.20863984464930022, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.45879801940552783, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.2695149221768555, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.4713033964653895, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.08839914053546608, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.13210046935115544, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.22669629371608005, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.4780713176952279, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.2062051322624683, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.41192098101671093, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.4351160994155454, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.7075991953765537, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.2505547110465864, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.48036570052288885, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.2164949874511416, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.32736826808813946, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.37294595046144213, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.598127662881266, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0034593773364647584, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.10587910341470286, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.3295957765387521, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.5207914581240252, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.27075075499555246, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5201548999535662, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.27338789256007584, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5429269981031598, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.2075953797357176, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.4344742362498603, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.293816771214877, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.4842449940538771, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.2063529291350913, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.48123766976272336, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.11546772122737221, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.2604812246395473, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.47203392907202957, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.6897980578458137, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.26709890828869226, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5611797797204635, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.3658141331541051, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.6386889736882309, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.07964662206989197, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.08209382469898788, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.24759502840925565, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5330580248133261, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.2666372228396489, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.5839132669613946, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.3703971546860334, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6509854048597393, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.3020089249326176, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.5666791239956741, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.3169340575963432, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6047772126282382, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.2728224724839342, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.5858336859170117, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.1649662542496744, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.44732894301721, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.37801805838989, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6674788880655028, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.20812209921683228, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.4762583476044399, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.30140436874237964, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6234338871585586, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.13582344277578873, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.23233050093887114, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.27045803893058445, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.5815404493073867, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.2667836062177809, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.4889374373828587, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.1515551103099189, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.49455791760408774, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.1059786102229136, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.2561557976916047, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.15507100728722165, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.48322409198286276, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.14889095388455822, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.41536400762130277, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.12486557620383446, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.2904789102327634, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.3971731969967373, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.6965186338399049, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.13551668809076822, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.4424571214083723, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.2697482929758505, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.6331597127209819, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.10369816700638204, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.2527691069954848, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.11930191477839873, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.26295403442210813, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.23919877618601593, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.5302876334280949, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.24664751641319077, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.48702383483350364, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.2103019561790119, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.4375454771782611, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.21396075329540654, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.5173735729399421, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.22941797870527758, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.42186981875418683, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.1465911128169728, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.3079988190146739, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.35317260323737165, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.6408594869465616, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.2736255491551285, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.5566377410597074, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.11568463948689758, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.3673356694093524, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.2940297877008057, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.542815022290297, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.06203726059862019, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.06824072321166202, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.2119156724475127, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.48896874501469645, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.3556610867487636, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.5896623713361566, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.43209473956081024, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.6466471725002415, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.343734330975999, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.5961090979865409, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.3576272831971449, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.6244083944373213, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.4251503518017069, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.649231006554981, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.2312335616732656, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.454430450698693, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.449694989832495, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.6848441845378601, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.33036326194813054, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.5335695187570914, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.24715873794308874, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.47778531553578674, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.4173623671609102, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.652879990525409, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.20128907818884004, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.4174485891003447, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.3020162743522857, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.5382164286742842, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.260409852867913, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.4693600515228538, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.21544027588567594, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.4576381595573422, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.2545286403887288, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.46127229234959366, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.24328450115124742, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.4776388219964363, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.25425921396874424, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.42317726915360054, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.02275832756960615, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.05875660911445021, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.347369324256538, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.5365384741618354, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.24053623916497383, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.4750171806233396, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.17580263472776825, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.3076957578052248, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.32494178152665526, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.5045476589637113, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.06347730185717815, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.29373397057939277, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.4428156698365815, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.24874987153684608, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4814988208653403, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.4140011428776289, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.6412021306400884, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.3759002268420169, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.6120997127625288, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.29037747307996287, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.5309836036249713, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.16052654068024738, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.34244874984732915, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.1325275948331608, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.3205120958231771, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.5065435153109259, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.7032321712710279, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.1768973089848016, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.42719621083968545, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.17901446805295448, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.3256054730322296, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.40124730128694536, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.6826212173562594, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.15196406724218742, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.16323864864249038, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.01246150158758297, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.10376214592757614, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.17716893523927718, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.3125133953892873, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.22843578925939137, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.39333887911230325, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.19035778476657209, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.32011375391986463, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.1317872333008798, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.3352902644692564, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.18961182664205528, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.3020662042654857, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.017943510022106292, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.0529980883670151, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0027688975221579555, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.018304015812073582, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.1620384468018578, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.34746269165329957, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.22571464820591175, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.3765675993955885, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.10040883216956197, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.24797453521432172, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.06951973289421576, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.2807452013801013, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.21529598963807312, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.47472255443386435, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.19319522417917573, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.5294666692683903, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.1876442538016413, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.45717296303154553, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.18002829271425153, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.4760283281580435, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.18247822039542128, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.4600264307679993, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.175300149784418, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.41324550460485304, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.24966398552210345, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.5875552237855669, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.18002829271425153, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.47969369143545676, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.2522249768533851, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.5507570621049205, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.15228884803717702, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.14339231237474268, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.2099318410412026, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.5209999254031551, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.22319344534343544, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.47255822473411646, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.3431794518924713, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.5291073153069198, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.2075953797357176, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.418796448457094, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.2563564295134795, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.5093318092996159, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.2866708602742022, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.571127006270378, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.02295424057510269, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.057552678044422, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.4630071026583851, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.667540987593188, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.24664751641319077, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.5273132664458353, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.23904922011090457, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.46170328338282635, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.27317735109275526, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.5633968676502531, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.11726841339493706, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.26372027377410673, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.18523686153564775, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.4293966276650039, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.17593291675420053, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.45966885600223345, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.30749506855677367, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.5615365420131465, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.2539342198718324, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.48976692911803554, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.30768118683142304, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.5089115971222962, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.19721007805842014, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.35728524815277984, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.014993257223678707, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.05313833864311007, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.25109549502043527, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.5476903392712803, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.1961515501265013, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.48948565712613856, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.30685590287990516, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.5269783493077268, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.14361580529268292, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.21488656415712018, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.028831974404303608, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.21532076850060974, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.26939482991021874, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.564348572305916, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.28232804221956187, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.639242930472136, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.24677721152898274, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.5655655793718459, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.15162461704349048, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.47435908632426016, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.06876893749716347, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.3746922202825928, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.17515913581878667, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.5159640302059034, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.3114493863658917, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.6199795063284539, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.3977612455119272, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.2958502265911963, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.6081864133794638, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.118073003714978, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.4725327044603119, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.18975464141824344, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.483345451709862, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.177282908048097, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5048008630035653, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.24609114091724077, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5163247162943534, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.20586736678432452, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5290915360201753, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.29019853911874177, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5949894213257197, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.3070367955852388, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.6242613917648033, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.16310803315335595, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.385251224878983, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2757308936630587, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.6126334851686046, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.21805912847922992, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5462064223958267, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2189685645981704, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4764545724040846, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.19770260950111818, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4864234961050757, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.1281637706417447, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.26820390293632596, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.17178273549483283, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.507628318520069, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.05670064571372339, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.16937000725041657, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.4219264367109449, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5790052627496669, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.21550905403743137, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.44719679117350436, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.27007601385228264, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4458020398892479, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.25300577345515013, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.44175219977629965, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.21147734744561483, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4024289879771283, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.4216050739426583, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.6728600493371104, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.2756885721075884, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5307227437376365, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.21991348529919003, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.50217348570388, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.14089726824262236, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.29585146611510377, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.19420534060688374, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.432678989151994, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.19946335945716726, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.5412386252302255, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.27966169949383496, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.537239861484062, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.32980384185673844, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.6175883753955328, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.31318569084293774, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.6117611701035811, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.33705661165643946, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.624031397469919, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.037115030218903694, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.1788805094660807, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.3860080723844807, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.6983645725285298, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.27353964831796046, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.5222330343086904, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.32310721474905496, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.6324724806146809, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0982831645960075, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.1272762669734629, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.20113161707875454, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.5185589580301894, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.14382854899355546, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.26400383568118985, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.21514404656488983, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.22551384015559367, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.1314926852888956, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2730576168913583, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.10759353804031296, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2469469161435681, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.02005744277065384, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.1517015859852234, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.14463738798777623, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2589218890086074, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.13690362900208325, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2762422431482146, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.10560514433971112, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.22163408505698107, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.05392295714154769, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.24000092793563588, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.10533275933768531, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.1454909685200551, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.09541186197466851, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.1446377197560293, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.30538385012782954, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5121153023805728, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.21889549804942124, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.3940841212708787, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.1882889817107982, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.3522812586532728, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.23766627182164174, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.44815154837573024, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.28175950490399515, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5034953110542267, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.006822057717339712, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.04590519704218084, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.4506022621318761, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.6366569943698206, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.2044537965576019, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.41662837209065434, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.2803589793821332, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5139077856801352, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.18318367941785624, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.3112497745580003, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.2188460064841618, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.4111151775845119, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.3618488169166299, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.6178847628712388, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.431319746325093, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.6234382849939584, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.3161306379595585, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.6012304838142994, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.3579300370931225, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.573996761928517, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.3080840787435305, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.4885219468370561, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.24325558783239473, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.39066410297361315, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.5356579160614433, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.7277322041803868, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.1738168213092765, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.43714714564599644, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.4265406506976777, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.6947449483028119, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.12284465579716894, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3000539818655044, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.13919020336387739, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.4856921552489764, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.31487248334376844, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.5635244346599635, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.22897967367089514, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.5334911242844559, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.26709890828869226, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.5042111985234817, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.334422418242443, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.5300778295156336, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.15215820165380464, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3730291976418706, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.008068095572196444, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.06582628108710774, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.41092285395615147, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.627562152141329, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.21255280545319827, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.4650106669780648, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.21130045509267714, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.39738842872135566, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.2852158256303108, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.5592687569674767, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.2063890416514164, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.36145113398437073, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.23158047038710655, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.5061207857603639, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.4218999224827276, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.6489282208332532, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.26356793966181546, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.5191302272110829, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.20298700573422315, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.3905231106721993, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.1495562478801698, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.27927416817557615, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.18014267897960143, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.3494520347780682, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.09315655656376064, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.2991354305100017, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.5247574818133272, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.7067933534801065, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.24328450115124742, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.5060248466367836, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.21262671745682374, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.5260755429226434, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.15235290224609707, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.28302702194787677, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.21486676443988736, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.36871818136959744, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.3563758622144919, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.5746238432846977, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.34637568582379935, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.5514391895148156, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.24720511037119816, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4462551342337241, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.35925132999878095, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.5699819571052286, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.38359501185588124, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.5602603893622257, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.23240102389974368, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4026544277401918, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.49510953811541075, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.7352883459765454, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.2981343875223826, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.477518453847399, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.2026004770366011, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.43250936001873813, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.3564982433809234, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.5955310812920889, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.15235290224609707, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.27566734637316337, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.2269289302318667, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4761409742673413, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.23380867598952562, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.4731313764465835, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.32326983669535764, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5757950493268048, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.2920934313715234, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.4983574989743429, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.3026558282583112, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.4694574798065052, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.21310996044302127, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.41481507363997655, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.014046579612901993, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.06227213209427425, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.3710777220377626, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.6158333675751357, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.22797230914240135, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.49559704046335284, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.2680102298488869, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5563627216923304, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.2082228064731165, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3453459075359105, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.23462825598816128, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.4659487960951427, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.1529466247397943, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4275296567755792, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.309848051124064, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.5622431891031534, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.2516768028374535, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.5052262603078841, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.21090682439932437, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.5250285784398728, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.1286693574614936, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4441562924808585, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.037035772240318204, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.13309517159270826, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.37081839104772296, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.6389376736347167, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.19360150634553708, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.47334926984767134, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.1416110914766342, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.45649224665512106, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.13700830775707343, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.269069871167757, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.20310341961604592, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4991920771058773, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.2380050699329688, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.48227504945496735, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.18624263881830802, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4914113027832365, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.2395446927992299, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4721484222602001, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.19228093786407296, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4484750286265722, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.3110340303714584, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.5519716855578684, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.01702725324941803, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.08157175569560395, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.26887618694630055, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.5500003011471738, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.12680676386575712, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4095969427556632, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.2533922732210082, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.546233396176927, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.12830494902442313, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.3254705907811071, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.19601813136817495, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.46178888135061663, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.20251299853063762, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.36192674925462354, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.12015228994776961, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.31437848676811814, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.17979384730979156, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.331355254735914, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.07956863030147791, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.27552150746832194, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.11845246173688026, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.2839119989582471, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.16636906484233852, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.28479590380073244, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.37042346597404774, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.5429200608002012, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.19938701089073135, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.3122788337958431, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.2152878500188154, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.36210098378060424, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0361216793750198, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.15096755094203476, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.23814364645160635, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.41371751248867406, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.3291256332376796, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.5670250015789864, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.3422882142242731, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.5278861608717469, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.38564863816921563, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.4887006722841345, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.23177043441348452, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.40414889866804304, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.24293747431834264, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.43597971711109645, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.18613958857202265, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.33365565390885554, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.5095567171193034, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.7144582369592706, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.31286911900957376, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.5694292229267216, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.23939069272230887, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.33856380909095196, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.3563982585943877, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.49354275608420073, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.19282932367379912, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2575059813192898, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.17208226602890947, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.25645334428566413, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.2489574113984516, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.5438702135465744, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.25119117418063647, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.5358947011982449, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.24117223077042385, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.512020635779483, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.3340823391039827, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.5653541300306412, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.2386641827505274, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.4975758233208786, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.21543832976633895, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3494273034291109, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.3505419761309475, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.5987069983604556, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.21815383167015925, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.4982958514803751, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.2297132059983132, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.44671126187287913, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.27336087678628246, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.5594458824515739, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.2082228064731165, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3245390541494279, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.24241809604223485, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.5139906329232172, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.23020656163897005, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.5608590094117443, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.2988707080433144, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.5286791480233601, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.29215021962379045, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.5527751145536495, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.28375086204441347, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6048386743476, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.3397920703569073, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.5935411202589298, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.1807114773593583, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.4487816742581557, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.48923776114221357, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.7423815677937077, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.23010506054463412, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.5562150832865256, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.37456291810610803, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6571573359746666, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.1351928705126306, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.28664413593577004, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.19375900698784013, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.5286306121502188, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.1931328662607509, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.43550456875371113, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.3394516832204828, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.5487992573856032, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.20801258614305904, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.4082367628634589, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.3623657040262751, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.4992077491214507, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.2058069729608087, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.4019360953517204, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.11350940547542104, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.23977961412737073, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.39382730058430515, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.6347386700093041, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.19418711990577714, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.41416182549648484, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.36432536049590997, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.6094788325888109, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.016148804908990694, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.059937646349923615, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.26871972706804337, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.5406826424997226, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.29222881654408056, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6120984237392771, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.30956660793759877, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6188773222172356, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.14588825992287732, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.39984326863280045, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.17890209808948412, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.4299320286626716, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.15196406724218744, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.43726288798668184, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.09010469109887134, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.27026073072610995, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.5288343547782808, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.7969290554899139, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.19772527187680455, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.5389440496417608, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.4330803865682828, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.7589029338970602, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.10624793541906809, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.32563569843178114, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.23416092054895646, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.5474842887821844, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.38935973617512226, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.09100730294865149, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.4152991006861775, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.12576299804399627, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.4275740936545043, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.1258768401036426, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.4861652311370069, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.10541304780958503, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3583012077811813, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.2167522404231326, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.2307406223440206, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.5788151459845872, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3476349310417924, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.41141702892520243, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.011825748052502034, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3840028089763543, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.16421603133867055, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.29137424728903016, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.20812209921683228, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.31687414190905666, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.16498223460029865, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.2972734576062982, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.22705489693606415, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.39286999195229216, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.18893041617782175, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.32203554666172596, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.16421603133867055, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.2938925751927021, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.1876442538016413, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.3540256720775971, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.19591247075997567, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.32778005458140924, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.18637067743823652, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.2985651743860094, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.2529258575884984, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.37189904894232945, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.16421603133867055, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.29137424728903016, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.16421603133867055, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.29137424728903016, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.274614810062371, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.5281783547748619, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.18452698284010527, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.37129342404244153, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.1649362336939456, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.36964185672093963, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.13727969222453051, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.29409236920754495, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.09261843076782389, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.3406703374268109, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.02874370235825497, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.09875426372916535, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.44770953300438343, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.6419549710447301, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.18523686153564775, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.3537843522005248, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.113156606711698, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.43802319023845615, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.13466364265367983, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.29553023036585113, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.08468606881585687, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.2488373778948128, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.34396207830145586, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5775887851128505, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.30371045098471633, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.6140790369362206, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.2252297536658673, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.437729946490623, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.17546787062208544, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.40827921653488547, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.26019126665731623, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.535666221551461, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.10690275145666722, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.16763819765042876, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.33047557311918846, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.6134378350119151, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.2469704880271774, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5214737751636569, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.1839576031944879, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.4041591260008859, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.2744333720270393, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.545433501864422, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.1839576031944879, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.2608764081982116, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.23954744997508795, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5282078787711029, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.3004961314114194, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6112720381807045, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.2668575997365348, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.5174669930427155, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.25289636204048427, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.55030569340461, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.2997527757973927, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.5791864392203819, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.2583948662751404, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.5105420923907518, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.1603931733528484, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.37904065806088205, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.4177104476436896, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7086018074577748, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.22647299841062532, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.5653420376166065, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.08677705107533369, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.19558575283017382, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.34749088141991274, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6197290698554747, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.1034094067304739, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.24170749955870371, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.20580697296080874, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.5235721554417833, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.2247327109713433, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5250140675378029, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.3378883984281531, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.6049772225333672, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.2761659300730445, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5565926641426052, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.34830115722228644, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5888195275254285, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.36581287441608196, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5826367630755845, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.24624087743244766, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5070519477529656, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.38749284922692695, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.6516567627893857, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.2274466311435254, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.48138629187483895, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.23709755163544347, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.4710922506827035, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.3298078055627824, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.553504716597481, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.006420462868175973, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.06924548146482756, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.22571464820591175, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5194003326020868, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.13384453331197527, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.4151425963129396, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.2276261087372084, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.5006338961901005, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.20109176688134525, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.5208655725098277, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.2473562600048627, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.5076737995930731, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.19726472415983368, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.5265681085035203, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.05469182036071644, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.14134611705275643, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.29165949127069796, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.6416400462287064, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.1822418298126852, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.47651743090462295, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.28074631028535585, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.6108353273226604, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.10369816700638204, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.22867526454708295, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.1288644336763944, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.4506387870225218, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.3003653956261136, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.5819235916814075, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.34009641866679796, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.6387903483458015, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.28509173779340485, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.5531716447251654, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.32701836499065495, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.5887530871636928, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.3215446470225238, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.6044362626458115, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.1621452786919851, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4266548530258764, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.39305243686884617, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.6442143025528515, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.21388271661657618, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.5554687912903771, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.449156109308219, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.6968694846040075, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.30587770809762665, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.564673691651119, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.14651860136741404, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.26874220962782625, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.3014199920541698, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.37258990587027996, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.2169400845409205, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.352650085718584, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.1539430723824455, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.26414511109897976, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.12816377064174464, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.2946511250628232, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.14925845270770738, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.29215719061835377, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.2819443057280203, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.41453825421149665, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.22153003594990717, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.3828811613558722, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.23313203626084417, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.3985003392586837, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.14651860136741404, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.26874220962782625, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.14651860136741404, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.26874220962782625, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.280867833557141, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.47682234542802715, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.3195214890612964, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.6238377764870237, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.2026004770366011, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.4124307729296919, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.13123243740715776, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.42907981810256635, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.36127411811309323, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.6437771669410485, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.022435099089467586, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.0629963528501866, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.5992890926074543, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.8208010526832126, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.27856409500832724, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.5620060117576954, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.2026004770366011, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.3493390600432761, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.4216050739426583, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.6298635247340831, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.06581314846603122, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.175300149784418, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.32397368477662136, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.3479857106948536, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.5434098077482219, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.127094130129695, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.37183060884198066, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.1616475408517619, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.2842078929375233, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.1616475408517619, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.2876758205224393, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.16055739172356015, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.38788396342269943, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.08278476463888747, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.2722654377129244, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.438372977246803, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.6331196317735631, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.29592551586707205, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.4766320516060901, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.1964351708221123, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.3340673420939409, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.06861077177982006, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.2923752795770079, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.13914825184539845, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.28558864930678796, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.14161200935137283, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.28747434511519493, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.22839293770911745, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.4657355446143013, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.1709913567536511, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.29449196775233905, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.13435637642994447, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.34277719024611025, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.2164949874511416, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.37103544427871854, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.2177197358110709, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.3769919943574011, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.1873975519857385, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.29483678830580506, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.2983904559856352, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.5687663930843418, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.19109543352736386, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.4581747073637954, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.010133180370259504, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.10549258015880895, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0820092565332346, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.37421819446145144, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.1791552038222414, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.2774929236779122, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.09521360830382836, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.28777216726322846, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.19606965736186524, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5029030066686957, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.19598322445625943, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.46665751191230503, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.21170876705481304, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5213888058464138, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.24268972717185816, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5576510639586775, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.25500119387217685, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5473994521063271, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.3536429640513799, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.3148511129075859, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.6156857190680921, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.11884101417355644, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.47174335977774734, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.2828696562913071, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5815271722252192, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.01051238006063824, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.17812705640334517, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5084809568961394, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.2104347389999275, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.41820208790180724, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.33893526679717595, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.5305909471293387, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.4005296397635166, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.5780131186067837, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.2979260336350717, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.5411460905234677, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.19228093786407296, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.4026187336796658, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.2473562600048627, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3874241899308999, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.5043660369058458, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.7114685753038728, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.2501973138123507, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.4364929917146681, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.3520774812078196, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.607239668980014, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.16467029855845897, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3377501966816411, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.23419233116745658, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.46742045504786317, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.2534684260065973, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.465022490109088, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.28341626687166926, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.4981912496496188, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.18787234368655517, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.43638553308108674, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.07967137083817866, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.2817820058246867, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.09044734140134039, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.318967366104251, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.005135910808249442, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.03203003524868126, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.31861382388380677, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.521868055391611, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.2002683261487131, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.3934832974774716, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.056164803850745015, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.3268244770409936, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.28648312939078924, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.08098869931579498, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.2826687235563463, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.19035778476657214, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.3275220698724237, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.2139885278593109, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.5273959990464491, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.16558141211628247, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.37532912975144084, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.06356475632281808, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.3009759906152119, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.14227980900528805, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.3453010483553648, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.15525616702869105, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.31389802556639906, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.2788221557440545, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.6008304689237989, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.16846216892989907, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.3801074056305939, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.1893886712648118, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.38509762225132554, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.005709385735849358, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.03522717964962578, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.18236198178601878, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.33077619366118716, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.11012419619306524, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4937984099457621, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.14599223028360678, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4828499846637324, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.12579787892324615, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.46133126472684716, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.13043787072509858, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.463417247785631, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.1235463341630649, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4614840691320092, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.055560319570139106, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3060391771300899, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.2744333720270393, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.6189610313789276, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.10547213336960157, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4536459955429135, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.13727159408550668, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.5081399318838673, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.06449817351457392, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3424747506665667, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.18837227000249876, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4972510079060122, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.1281637706417447, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.22833109825855033, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.25500119387217685, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.42217126960650364, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.29264105234089743, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.36844739850003594, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.15829860807960125, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.28885600341606654, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.1309822409175701, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.24657461387159663, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.03320059139020178, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.07438646667723649, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.1125619471249954, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.20521339316349824, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.31921572308551066, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.05173267766588821, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.2600178198215753, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.125405948089153, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.2272526814684417, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.1397775515541073, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.2591367637695346, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.38249626297768063, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.40976234193505356, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7281051247089317, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7882997401328445, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5806197937310393, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7346706700987636, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5793367580502561, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6502428441722727, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4855332614117322, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5299556742893647, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.369345079296433, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5103516764863386, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7056438934239434, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5357110024227318, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6365941772753647, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6960917409740967, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8209757784637755, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14790264259417688, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.27159767590045303, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4751132438608344, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6849386986272349, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.08635800047213174, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.218109371254876, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.25552199116069907, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3799133205289109, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23386786214190372, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3682311523733465, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11739521786077453, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22090491782919655, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1892240568795935, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.280413108453108, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.139800134566647, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2510112235832054, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.40214612768560637, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.45128424593135114, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0925329498915617, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2110486160692096, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2887308472548599, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41654484827391225, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.12453389344594705, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.141543757252386, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2594145364221844, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.5642761727828352, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6181373706707737, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.4093301993048525, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.512762518189388, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.6244631487487835, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6931369519059803, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.581972638479957, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6970914528585833, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.44120063733294235, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5296624608564717, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.4440750605884706, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5402588602256685, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.6244631487487835, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6931369519059803, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.6458552885189878, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7468283944111381, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.4272870063962341, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5170917334956868, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.43310177167002284, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.534533410927948, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.26481979271706185, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3212854967972961, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.47171327621770304, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.2615858282579583, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.35447530946908884, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.4577275269488853, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6747054474171109, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.25383339228798274, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.45896379476820603, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.3508739523842563, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5533976153694653, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.23705266435224473, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.44716007458096513, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.25530635525095574, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.4224404198283467, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.37762270401758113, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5372679696382219, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.44392090655418587, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5678926447384061, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.40891568776497583, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.46522329223142805, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.11436433361427001, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.23221971735799607, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.18580985894574314, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3347249292100999, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.15138514598766048, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3237497764315872, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.27668736912821895, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4414406760568898, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.17200767571780612, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3723150838362789, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2465659486053858, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5689069160047179, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.30391153783979835, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.1544458227548897, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3343587266874694, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.3627848276110141, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5349346532576155, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.23817261442630488, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.448286611717823, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.20679845323803403, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.47636494608150104, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.09147827112247602, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3258762519783793, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.15604242268653643, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.2255928425212252, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.4751132438608344, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6159319815107203, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.1477219991186121, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.28685201698226354, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.1477219991186121, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.2391308148553106, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.18180608220159192, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.27307753334479423, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.14965975078050625, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.22213502776474325, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.4093301993048525, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5021029088235913, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.3160946016179871, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.407876439044591, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.025108530586642898, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.17466240109087192, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.2719194508460068, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.12316365460790003, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.2615858282579583, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.35862918415512257, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.24728515687112834, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.3088155734423375, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.7281051247089317, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.78479833664205, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3254455687469726, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4474512036484817, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.6230832293767097, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.702540870003671, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3267294026204632, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4510525482602028, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.27718461611705486, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.7056438934239434, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.5365920629514802, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.6274039030337838, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4578226095312774, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5406295999835291, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.002054231717337716, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.12286996020967837, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.31567668741706395, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.353203510510529, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4910213297498164, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4815092081725061, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5820265218174012, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.23887527917609022, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4120359948636439, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.36210097004176117, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.408098151133905, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3165014630070639, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.42516173623967946, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.15820362165931962, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.2249046365436241, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6874078611406401, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4753167451887016, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6372909532389948, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.38317923930200504, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.47975624978837655, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.2054194471318506, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.25678404806291744, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.37045149029437513, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.23660362391696813, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.34152697838249696, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7246473808162345, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.728208634600343, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.5793367580502561, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6502428441722727, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.1243018504102695, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.32950116238735283, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.369345079296433, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5103516764863386, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7056438934239434, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.5357110024227318, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6365941772753647, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.36763082847636347, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.45637140510576385, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.16935976352352106, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.31268514922728713, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.41990725085948355, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.8482942955247808, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.565361573648609, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.7346706700987636, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.8363600587440573, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.9912737182609732, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.6018154975998465, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.7669980679050217, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.8482942955247808, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.34589895849033114, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.44792042673107413, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.47320724783393625, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5833006006517599, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.3556521383601747, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.594830811413066, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.5406964703993759, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5964595329953364, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.2575863752355164, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.3717184743596148, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.08197539732074254, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.2552663483401067, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.43994654743790196, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5758276578902723, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.3730786950813075, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.47401660085208147, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.38223593598574, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5729676575997464, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.19328966457045355, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.20477156411200437, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.3371728179865314, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.19984607356962125, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.29326031481052006, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.08939270118279458, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.2952752522340665, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21629114799587432, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3542320138389837, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21993356630819796, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3822901360655399, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.12212865548711085, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.27604929504751197, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.08939270118279458, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.2952752522340665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.12072692160188762, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3162144633910661, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.17095864413061523, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.2805749649536233, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.13952118378975725, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.2962794525145751, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.028735632183908046, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.17670087745185423, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3136010782144669, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.27274191069381915, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.37436438971100644, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7246473808162345, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.728208634600343, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5793367580502561, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6502428441722727, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.17662903260733673, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.359573626731952, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.526589137558171, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5667866238125795, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7056438934239434, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.6244631487487835, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7155411017347171, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.6960917409740967, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.8209757784637755, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.019516573752972968, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.22419056820298167, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.3577306040313533, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.2434330428491034, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.31858900384957733, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.583526016818016, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6994652193905146, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.27405612859390877, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4639958592456083, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4390960897971484, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.541742178821102, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.13232291594986312, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.301901669683193, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.15084825228964133, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3284886849880412, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7056438934239434, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4452652851854937, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5889782977654896, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.2677353447271197, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3932141708916282, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.1536690667279411, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.23373462830676886, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.16800102974369996, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3319781987745275, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.24007528246707907, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.31084467045503017, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.6052987576779449, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.643602170728296, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.13004800471424346, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.28217142159025543, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.3924259174695316, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.45050557152077386, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.10601317434781207, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.2344095627038401, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.12769027061800275, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.21844360831325868, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.22523697594538705, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.387672788880256, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.23705266435224473, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.3838188339168412, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.1852972751417938, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.36660412101424933, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.2653698485201136, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3674668904964848, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.40975628086142124, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.4815092081725061, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5785251190053333, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.37821486365532614, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4718665834023439, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.35423985843000033, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4293667924436175, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.2933705789311311, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.35570110758127277, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3931807596037881, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.41587358041151196, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.6230832293767097, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.702540870003671, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.17679588126795498, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4098986063548376, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3675058901988579, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.445171638403697, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.3196352513221046, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.29107087297820256, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.39517833279310743, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.3763743474188506, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4120099199050514, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.38694317759010316, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.45827711860455167, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.3699382260470039, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4032851361478274, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.42378190548671596, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.47335507275218824, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.3763278728427448, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.39009457811977266, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.34791594751284466, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.40864368085475805, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.45167594566243024, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5169677927619225, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4056782022243561, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5237454577692897, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.409211292187266, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.44289452305459603, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.139781837804502, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.2134385691462796, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.296993231533869, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.23270804908165135, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3478589640284733, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.4115167991342047, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.5649900101054287, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.45167594566243024, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.5169677927619225, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.2706805630983137, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.38186806613291924, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.1890425467840326, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3513019690066663, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.11234905986715489, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.15225251521949978, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.45167594566243024, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.5135242648129007, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.13733894353973466, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.26356016327430454, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.34609083438127625, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.15878174295086994, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3163237075880393, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.014379215591354156, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.12943648490176665, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.29942831535046555, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.22453002699007485, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3354597455808525, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.24489516889906388, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.409369762090413, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.23386786214190372, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3780009826926042, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.26696378876165927, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3581548569027847, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1262744724314408, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.25266688865379994, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.12061450720815534, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.28227862122593256, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.7056438934239434, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.26518122980477765, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.514846875413034, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.05176181222975041, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2082376263771737, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3665582908776792, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.010917030567685585, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.22316698150381944, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.18341524527739528, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.3989952325675248, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3925121365052661, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.47788592802001717, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.35423985843000033, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4401068255722377, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.19263684669277223, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3020103087706165, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.36625663694634303, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.4772894233335957, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6272300080155604, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.2523841428380167, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.11146727460890443, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.23969027175152666, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.1123102665458642, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.17729842264695017, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.32305168353427943, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.12789533377801793, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.2283763803651714, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.15896519992112562, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.29513999460654694, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.1423412184218882, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.2596718628394258, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.17615962296513688, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.24431474547499252, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.13502367316243039, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.2597905925100196, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.28254292734082, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.408604647745239, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.16279348731624776, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.3149639290246331, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.28200049157537727, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.40682121687319617, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.20069190971412876, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.12409597120849801, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.19449255248446348, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.3571150500823898, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.44642876819396304, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.3571150500823898, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5018967494794737, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.3572188192648703, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.45381175288762937, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.3572188192648703, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.42937064888927773, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.2513073726775429, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3798674638470122, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.27274191069381915, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.39756349135906077, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.27289712011595696, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3712305452784761, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.13232291594986312, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3103406767609728, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.08673245635389941, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.25266660249677875, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.2295748846661433, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3277581848986239, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.11902001907030836, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.2714975491916872, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.19040700845445938, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.354176495487078, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.08968235248346597, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.2175311081388801, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.10682827247639556, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.21551117313912851, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.07425055521504613, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.18122341046764998, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.16352670859125373, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.26406333983187025, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.14974959199825547, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.10640850690356463, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.152668380659781, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.1022875701616399, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.21585478443422898, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.09812163258584553, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.23909785111906673, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.18802425548665458, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.10401577613691954, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.16184347717072042, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.006827911047017742, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.2164910348876327, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.1472462377094902, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.30525310195831357, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.2774527633525211, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.4358323759361012, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.1978585723043446, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3527599187160617, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3021375397356768, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.460354013765958, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.12991916506579942, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.27889392608860697, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.1959280139287724, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.36708131749832384, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.15663915411954296, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.30522234950263477, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3166144686275811, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.39315143982598805, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3761648431086742, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5466736282576272, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.26007925058007886, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.31314224813827346, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3932583887521134, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.5199302229930708, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6017481019884499, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.2523019529343173, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4406369072888057, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4195959935514934, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5643628666004862, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.35974578964005544, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5294218015563622, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.3615855225145535, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4838257582776513, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4855332614117322, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5860317733541353, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4671778989333441, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5801543735794272, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4501609222100726, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5779114321769039, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.06170484898727104, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.27199778234956107, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4129085501138227, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.24728515687112834, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.31221693968406194, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.19230188007838597, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.3407021378942239, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.41072675483179805, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5635589150380774, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.4122974402951816, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.49812945858788304, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.09812163258584553, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2826014149329834, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.16954555291875742, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.31800856700824265, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.7056438934239434, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.41489895705463226, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5351666707169298, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.2505523539251516, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.3831975160557709, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.03616809285846403, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.19004145843928574, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.3699750032227405, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.12045422179467957, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.22184013352319704, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.6612342258381259, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3883375900135818, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4643731845106876, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.11556647985416685, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.23748323325414847, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.2589451141492935, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.36265302427150575, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.21223633441554032, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.31520576641768766, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.45167594566243024, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4937637674644026, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1532685994792829, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.2563896884841204, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.25957530464210776, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.18223058800557917, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.33062429129755794, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.4887128900317842, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.6064630666233242, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6712747226800536, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7123666275414222, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6560788161152474, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.25841450487427714, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.44494784218322847, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.18279744869146425, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.4024804882630303, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.4245970617334277, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5694561181202089, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3360020594873999, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.4979723869498355, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.2676032275663791, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.40689458580141896, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.22319449652482443, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.233078149078302, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.41167690646865734, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.13536681105774234, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.23595972523293418, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.14728954312449322, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.2256490809237466, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.37300331821940047, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.24883108274644028, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.3329995141432608, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.13827175716697776, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.25880112791050663, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.11856660123276004, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.21303170584506914, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.24017995835117018, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.3874922193559855, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.26505211456170086, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.3924854246504529, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.1601125708485386, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.23752778281494039, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.033908919446183204, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.15843589640881697, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.34791594751284466, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4062384532979022, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.7795149903947967, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.8912732146280626, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.6230832293767097, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.6994652193905146, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.36763082847636347, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4293667924436175, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.43728553857900826, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.43806460289276367, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.41546060026113085, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4663335536810786, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.6230832293767097, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.6994652193905146, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.4578226095312774, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.5429809487027987, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.4390960897971484, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.5164819146651056, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.1600733593956426, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.2350173429055301, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.27182269429130823, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.3135936384682831, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.33737554588923646, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.369875665962309, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.30389058699653954, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.4310896909809194, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.3763278728427448, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.4261526683335186, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.36210097004176117, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.4423339372603474, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.3763278728427448, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.4047854120649662, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.3780488661667278, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.38749538363085073, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.4246183605185108, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.501948957312799, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.4246183605185108, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.501948957312799, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.32001589569502475, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.41050505670086324, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.21163700429456012, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.2754475815887392, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.25437706194445847, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.28858721839661267, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.3924259174695316, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.4246539836622663, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.39022736644855677, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5202587069271436, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.3672404084841361, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.4518744271362045, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.34332704063408953, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.408639131209588, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.3672404084841361, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.43288682804187184, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.3120848453730729, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3705328519354151, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.4185938787651429, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.49815219590152665, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.18059154473936082, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3333808784117231, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.27334594211112967, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.35690726265980793, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.31277600813200596, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3423771857129358, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.25944320225692963, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3049825437064534, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.21027545940631823, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.4572185175571455, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.25383339228798274, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.46123461430035645, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.16401798649868696, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.3760928911869727, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.31756348590173983, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.612638372401986, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.1535259783865636, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.35449697447586703, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.14113991930789777, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.33851813032056655, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.288452820117579, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.48305135114165254, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.25383339228798274, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.46285845798510755, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.307137308263447, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6108807661013372, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.104552581027927, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.22766480821275292, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.2140405179077785, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.11414633188690328, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.2503197875391322, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.17807129401511626, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.07843772989359644, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.2669076052967215, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.12913533075470382, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.25219595014343127, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.108043996762779, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.24726477214045167, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.07418182487612639, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.26363958328353637, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.24883108274644028, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.2911305725820655, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.1022763758993479, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.2461976716440084, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.1136025775201101, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.2506114986319833, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.11092770141728163, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.24752626772001793, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.1179949261549654, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.108043996762779, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.2560670416470495, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.18105048502088059, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.271054087912132, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.3029928206533524, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.45262153733641225, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.43000007605628365, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.5073076708050636, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.4125656013737127, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.4621663225660634, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.3512502252598613, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.43869157736529, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.10991514729498916, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.2549321593272589, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.409211292187266, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.5745300552045192, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.3471636178393148, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.4191180487309204, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.17493830569974406, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.26690261630673184, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.13110052718009899, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.25678404806291744, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.35179752925748303, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.16767849550785174, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.32211607665330505, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.3068038152276031, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.17278760032513005, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.3872014058095359, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.15663915411954296, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.3014802378681773, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.11117895489854909, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.2820967744384662, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.2305923243495954, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.3552281813814547, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.4760811876074703, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.34371432547871084, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.5618168006545909, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.1636336323736715, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.31453116716344526, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.11908088655595703, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.1504843536148922, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.2872819820921106, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.1032637856654291, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.12256115024243901, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.1667153530685962, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.2563322947493884, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.18882437844970767, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.2977239406428574, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.11676324068066542, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.17463781885740615, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.14516097783347692, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.11548431380162248, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.20558096287614028, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.1022875701616399, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.20801464611670326, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.08036517486170719, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.13972284796504708, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.002670940170940171, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.07993498817966901, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.17694915875422723, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.20162477784805663, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4096636937616398, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.18653722013077995, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.1075467277016126, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.19568663135308928, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.20001812735957777, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.10356012289702854, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.31649183953796767, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4627135773217405, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.2387391897386676, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.14127146367040996, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.21761435882075786, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.1371326695219561, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.21582567264281033, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.16856369030953883, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.17222588358752802, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.2079623070209971, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.17459825687868188, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.13556459893693668, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.2298838546025776, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.17271632813084475, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.15002157377943118, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.20909575217027426, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.1710093902521564, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.17620697694486812, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.3349127944613063, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.49806572776935465, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.643432991222625, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.40515809323728763, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.5449200667389397, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.47992366044240764, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.6339648696206408, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.33106709338898954, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.47018261317709287, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.11758806799906993, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.2301535009751364, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.3757154576765614, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.5486864756832853, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.3757154576765614, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.5486864756832853, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.20313943660946668, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.37106764025714706, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.19761584919202016, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.17042178650486198, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.3381936694419076, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.15285713412458543, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.24746988080227628, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.22777548520508317, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.10845182904139573, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.27825798566245524, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.21143896166926268, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.09264863757089695, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.28521768773900896, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.09654156210312353, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.28937694627449495, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.15858053013682752, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.2610745423686279, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.14153945350617025, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.2844284823024208, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.13547277341758465, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.2717018577858807, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.15542859771226727, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.24568580024277256, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.14132052098159442, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.28750248252681143, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.21004850229269248, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.3447734406041285, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.22743363869750483, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.4248437050568334, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.29579846078012384, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.5170279422640637, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.33018775735516415, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.4302626210819361, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.17034799410713272, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.33202264871471165, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.10389889377420038, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.251017701718585, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.4001601601922499, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.5775093624441812, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.34426631072695274, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.460136778351592, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.24727997687684689, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.45638354728732916, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.16500804259372737, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.1411791520898124, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.2982093797567791, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.2512712267295304, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2928484110896528, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.20723903671796345, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2746682387996949, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.3626305461419687, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.36369161190356464, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.20015392096614926, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.31608746169943613, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.1333497993210919, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2934780949245599, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.36332807605324846, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.36862616956526695, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.2512712267295304, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2905232453852545, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.2556428001399479, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.3140105990938112, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.2590924722191636, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.3107725858839102, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.16634906495439625, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.24471852734537655, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.05742930892025214, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.26702493151987683, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.32737126018440355, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2539593860148789, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.31852925024650675, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.22650664914012167, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.44021101913103755, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.3039519244807058, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5267184242152971, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.42843223509772244, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.6251003871955769, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.10511846841633776, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.3196991085171128, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.21472986052554088, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4245082395812861, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.3493450883427864, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4989784300945961, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.34448756871464103, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5072773512435553, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2563499792328283, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.36968859129850784, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.15071604670320132, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.17278760032513005, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.3286210367348145, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.1416851070122953, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.23506726943391335, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.16170884319006984, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.21768601999695544, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.11414803586430372, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.1340366040130036, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.1768181514874441, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.1610171131700526, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.10844052180809807, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.17299030784008884, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.005676804759572728, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.16738626209234725, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.2250265947708922, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.29110362374826554, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.30776997671170997, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.5302815626138546, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.355402294764931, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4505070984023486, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.11739521786077453, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.30286447545869927, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.23819486101149287, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.2942276102068942, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.21738402658823416, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.34769229172545757, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.2910873658777246, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4244075490204617, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.26788770505940324, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.40022938036917965, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.24669426816409512, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.37864393709540933, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.017203321134774727, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.24138580957738648, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.1411791520898124, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2902470444985328, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.2171788734284664, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.33899728453126426, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.1050176352370787, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.3096045806359874, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.14635786490292246, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2972359344684186, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.09881511482966875, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2992587662174414, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.21997327371895098, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.34239865333501235, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.14458350226851174, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.291471427554394, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2187139093978746, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.21375267056360892, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.14458350226851174, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.291471427554394, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.16292532792713388, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.09881511482966875, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.3096045806359874, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.35423985843000033, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.449850771779881, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.5572806310452209, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.8076383886663636, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.40214612768560637, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5573169779174251, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.6230832293767097, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.702540870003671, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.2433923519922121, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.4163168327571102, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.08669514820036678, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.2499545634224168, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.44875232315488756, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5111093822962448, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.3337430862067958, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.4389882441117941, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.2770840545468946, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.41497301664586334, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.16291153978891829, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.2578557675410513, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.3636972258269605, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.4815092081725061, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.46880886343224853, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.2782546336574456, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.42327771190588576, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.39109158855739823, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.48247427218434713, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.37821486365532614, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.479249956424894, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.29580528518835375, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.44985936994013187, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.34783139668510527, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.44014965506542114, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.434975077577922, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.47044769122745783, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.3340174053783229, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.38192575633810405, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.4070491655025483, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.42034007595588657, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.20783608924923047, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.30250701195783797, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3647049156958342, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.3931807596037881, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3863308383621456, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.4185938787651429, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.46641278921549706, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.4185938787651429, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.46229900712285454, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.3917196589390866, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.4177131012054097, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.40842567407749947, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.40069931216675575, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.3931807596037881, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.38977878583131187, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.3909779796089584, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.4552666486837008, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.39022736644855677, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.469958902604639, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.36343365059404575, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.4025730604768124, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.20237126199349462, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.2589451141492935, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.28851079284233655, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0842816865856957, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.2485953030231616, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.21057401113505914, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.08678345215657841, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.2461700872648841, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0847400567745194, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.24521641593951496, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.08892786873926027, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.2461700872648841, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.20652562031481567, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.11755743200908036, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.31066419642253373, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.24865388866098986, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.01634489871441457, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.08581896038434547, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.06337708222967141, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.19580581055921326, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.09604819623436828, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.20999393339350345, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.15713284450781043, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.2936101635241035, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.26837663158331726, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.36155324846955933, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.7246473808162345, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.3525678415060714, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.543942790381318, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.6676191940689508, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.8174079050545816, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.1671164029594426, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.43350115970032554, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.14054613281857953, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.31166159977702695, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.7056438934239434, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.39814417587130846, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.6449325609322236, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.19882981891203355, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.39881128802168136, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.1569912440358649, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.233058107264253, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.22840134993478534, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.13733894353973466, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.2628097872401104, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.7056438934239434, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.3925121365052661, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.47722987146488, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.21620508650490267, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3140933570370138, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.15911783110981517, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.2929026229365629, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.11371681934875245, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.1736078338284175, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.42988105429544615, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5765642529796587, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.17966302274629617, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.34343300645922903, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.10754999784156925, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.22467133896316752, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.29042767355630905, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3820449807327681, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.2494483948151807, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.2826410419631889, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.33386474883709644, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.3352430929236216, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.37654805257811624, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.37284875432797243, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.46330437039257283, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.14858713442145016, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.26570814480513566, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.3917196589390866, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4937847892557821, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.3203759015876555, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.36881913952874323, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.3595597536132021, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.45538141531674614, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.7123666275414222, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.45167594566243024, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.5423007037746893, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.4332008456263299, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.5055542871350683, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.2001670147593021, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.27873308142715214, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.3198803457323068, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.36570299791317035, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.2061927630692647, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.28624131346749065, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.47046185035490873, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.6292601141059937, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.25983833013159885, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.3730064448362738, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.21849712035317273, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.3365794075261624, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.11902001907030836, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.2615385026948039, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.13733894353973466, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.22471476580349214, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.41546060026113085, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.5135071749227893, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.48195116293616075, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.629169893832561, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.14643937864373885, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.25912071818960414, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.15110567441923345, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.260101900069982, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.027170554671717636, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.16436148154531297, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.3023015988751768, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0926947735155968, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.27152680319064787, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.22886565624051966, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.09769805815777928, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.2758478148913772, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.31461500509304885, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.4788526616838816, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.08047798253159327, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.3229186657362169, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.20241776365171346, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.18661962390369358, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.39133469696528195, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.2768820552331134, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.07229703035677075, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.2726328932076192, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.015600197098512953, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.11414633188690328, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.2637316604062493, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.15867077366552554, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2515767320274808, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.21300958856758825, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.10629625019345329, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2894452751925746, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.16368118043487423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2975281469582198, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.09992869870690119, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2643750418939446, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.19787017233255969, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.18167706997684474, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.3278493462869521, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.12076200198832092, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.30895143622174265, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.14823537448657864, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.26063968470685284, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.14710052131359536, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2539053433397206, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.16217631902430296, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.25122013507115176, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.31099162791754675, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.5181212746323438, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.5589229357546774, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.4093301993048525, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.5295425318265925, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.6244631487487835, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.7155411017347171, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.5388058943574727, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.580739671272447, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.40214612768560637, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.4450640488450856, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.5191178952716458, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.542900586633014, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.7123666275414222, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.5365920629514802, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.6337747731257387, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.5357110024227318, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.6141900265782613, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.15145444933520683, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.2826410419631889, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3732494235741262, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.21333164424828907, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.32785783444873706, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.4825755887985002, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.5834117627541725, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.4427274357129559, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.48566583494323345, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.3215000448278979, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.408098151133905, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.11902001907030836, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.21131969135775222, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.13796620851017116, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.2991162156594185, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.48195116293616075, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.5672887153097735, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.27560016787236363, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.44835630608859733, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.39880891978177824, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.4801322208759939, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.17528697866511064, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.2164069950016749, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.3582972930343776, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.16312445849704404, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.17349790295171436, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.12765317762547787, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.20795234713221633, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.1274806088188179, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.2180851489906356, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.1587094725737821, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.1920594666485555, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.11739521786077453, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.22939440444042064, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.16236645871179417, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.12285228762352728, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.2541335227987007, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.11581430177230231, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.13310877322785153, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.3615855225145535, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.39302513361762836, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.5136268735913038, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.7004219512590859, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.5793367580502561, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.7183290415445132, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.3960970942970261, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.5421737755936655, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.2557308431384103, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.44020046133443264, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.22408914992675832, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.5440766840557734, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6486942806598895, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.47467913885027985, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6102089900133376, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.3773656939703193, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.5248351209923585, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.14757601755321914, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.35974578964005544, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6245567852541115, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.2677353447271197, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.33809821343395446, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.3414464563275225, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.4182681167677125, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.43812558475283875, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.5153914304432097, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.3864572432237816, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.45154198433087883, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.2588173346314121, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.34966578410219157, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.2668730618874673, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3629773991680274, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.4374062018435253, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.5403839144614929, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.43098807781921006, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.4998903441953072, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.4026617981948598, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.45961131726156146, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.200532240765861, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.2677353447271197, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3655280950277252, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.1022763758993479, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.26825052055805815, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.18808242155433705, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.3220587874741547, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.1022763758993479, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.2693375138315219, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.10203198615804732, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.26393210689508945, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.09868209342887882, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.26393210689508945, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.10508106635796587, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.26161040297988175, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.09477675811896721, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.23696160551015213, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.18245400323670688, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.12212865548711085, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.24707767933143832, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.10440864748411478, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.2671475795987059, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.08023149270718091, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.279740710493905, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.21330178332703942, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.4261888165527193, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.06534434987768793, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.27176767387111833, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.22739562220830448, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.40909318589710897, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.19682432817897016, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.21669634490560397, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.41348528734771456, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.5036806117103709, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.06708839685423082, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.3186988593970526, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0686555146522301, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.279526430463802, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.18786971228693808, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.2134163469900347, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.16168270317308941, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.26228540738738376, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.305925215411119, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.3174603493865962, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.465550295868511, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.23266541684590059, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.43542584213311014, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.25767850065044406, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.10808631609223593, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.24232742873756352, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.4185938787651429, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.46181900132004605, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.12874330508144843, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.27005885018903275, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.08893868599300617, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.2614872038666333, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.09235276591518521, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.12577829595095136, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.2463514312047734, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.25430316746203985, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.31361769699186176, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7056438934239434, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.41072675483179805, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.5635589150380774, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.4390960897971484, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.541742178821102, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.25430316746203985, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.3843264258161899, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.26832828828804234, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.4353421873417814, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7056438934239434, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.3090705808198716, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.46344802441586025, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.4761654595813381, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6369102574048467, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.18676394386940107, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.22435510126054356, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.3514245731837287, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.534130899739072, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.3090705808198716, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.4997989608278053, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.3552281813814547, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5479990039688047, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.2042128370387497, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.34010033215004876, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.21617263218447194, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.39665284207892343, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.27449463298776555, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.35663228170028305, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.4810110961578451, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.3493450883427864, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5052914563530501, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.3072646319682134, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5375887148749452, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.21468806635443977, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.16711337215358957, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.3104580254514014, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.264371505578968, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.3692663913160793, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.40358627497421223, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.15604242268653643, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.35440326623172935, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.2590924722191636, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.34962994893205634, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.20466509965242627, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.38543831682371826, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.1261312269838889, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.29273546001876816, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.7056438934239434, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.3538829072573592, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.4576280788857466, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.00625904491799232, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.042587313196008975, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.25944320225692963, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.36740468766073175, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.15888556418399724, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.25359546478986267, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.2442195895839763, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.33985143707913, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.13536681105774234, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.29163125383681, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.6052987576779449, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.6809283802101068, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.29580528518835375, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4152245863377912, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.29580528518835375, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4152245863377912, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.30128865413061245, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.3577177318823599, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.13796620851017113, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.2971316298359249, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.4772894233335957, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.623743257110184, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.3076805720186954, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.5322299798274237, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.02967152553066799, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.4587032440161705, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.5453529746009712, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.02629356643029618, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.2585740748062296, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.108043996762779, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.2627337195947467, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.18085702029043885, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.338987683983403, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.12829843029207522, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.2926652353247206, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.12162779391619735, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.27948736250364437, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.10490894282140378, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.2705485193181514, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.07861539293399739, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.29011304888643985, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.3044918933465557, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.4630596968724663, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.25686963328488466, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.12829843029207522, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.2917663610295337, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.1956251535458609, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.11092770141728163, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.26818571204274316, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.16516473320936778, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.11146727460890443, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.29428893607214085, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.10640850690356463, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.2838000569859586, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.10866043914193523, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.23638081554717555, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.15650329606801927, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.22598986047875458, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.2159287855104448, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.35640441228857384, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.1607479526054687, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.20326207399228274, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.09954109933243607, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.154567666908047, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.12987293870549732, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.28422427146147505, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.14849103164051436, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.30883024781428503, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.1934884374107349, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.40687776179631713, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.26598871140611724, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.4502776851570257, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.07984434410510546, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.2899871712096496, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.08096470168539781, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.29129043241292674, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.41165970065973745, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.5948213301983136, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.22270385608874566, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.10527834487585676, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.27391969966031443, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.07858086105703722, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.10795293558053044, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.27042845399029614, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.24344044484205296, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.36944883808094725, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.16436148154531297, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.3129244553794762, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.3545649986147617, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.4749542277669906, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.2517043491414655, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.37468110318084064, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.2753358028987337, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.4077441674615824, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.2250265947708922, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.38424599049509484, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.29113216578145623, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.42679430664357537, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.2618449527244832, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.3726299296957476, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.16467029855845897, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.3263521700764589, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.17034799410713272, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.28984287829971883, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.2721328088738599, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.3634848474258691, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.15326140415213751, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.3238101789644524, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.18815571743190213, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.375522612679117, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.1543252261021413, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.3572407559404224, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.19035778476657214, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.45948305812456913, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.2264618820011072, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.14893983010707912, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.3389883503505506, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.20638636014941364, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.427660959032505, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.1572175759719851, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.3450580718329653, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.32441734031006125, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.002360717658168083, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.2152691957249421, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.3352430929236216, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.41922948177882463, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.2666090188234886, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.4310539878732571, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.4004456831424889, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.5299539464991493, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.3960970942970261, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.5007789387798656, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.37484914926654, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.4826432834392733, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.4369942407063455, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.46555087927121475, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.4374126034082234, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.4706011613514873, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.30778741582971547, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.41913422458998256, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.36210097004176117, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.422634223525565, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.18207052811092134, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.2690092394312927, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.23715535229161464, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.35918995475524507, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.3398088489694245, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.4228308786458922, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.2927926577346015, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.4165527532106081, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.27073362211548463, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.3669195863456915, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.2608721373229356, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.3669195863456915, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.27073362211548463, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.3426098433314766, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.2310325762059593, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.3818848271995484, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.3140084866979345, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.4201283238904398, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.27668736912821895, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.3671983604767805, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.2666090188234886, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.3625204192727816, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.055888558015542704, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.24315949752483765, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.36212873179586813, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.10401577613691954, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.17463781885740615, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.27890809547716944, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.14068535649874328, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.21065537154817968, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.22291343499214064, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.29815406656323407, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.108043996762779, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.20592612474035338, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.09695871631707126, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.4185938787651429, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.43266448575617944, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.11234905986715489, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.16108949081819493, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.368275540257269, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.4344880644474143, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.02072968490878939, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.09772992164303729, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.17471046691781456, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.09207598308796072, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.24553846741883023, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.21070435913784732, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.1163540245408256, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.2115841789715117, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.19264094072473242, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3181645976891593, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.1328981075995534, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.28935468520358737, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.09518930981816905, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.19355453531805264, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.195647514979229, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.33945900257486206, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.11238170584522883, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.17264259150244354, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.12861981016228477, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.2805631135214855, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.12183427844024919, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.15494432466984584, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.218134321293328, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.35541240647259675, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.19105600040048565, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.41505761608077835, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.2064597158958983, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.36557785420213534, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.3120848453730729, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.3751126030933485, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.1964771343560535, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.36076279170952025, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.19147265798368787, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.3652160842746311, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.43644602255194453, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.4593205617863297, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.11729176379814876, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.34619114638131826, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.2064597158958983, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.3640697123638358, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.18207052811092134, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.2771423653771131, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.2897029272018376, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.41353605973777596, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2246029757863831, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7825422900366437, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8503171627677965, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.37709297891717664, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6881502501430368, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5581982021478125, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.49546288984677567, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.43795381992037963, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5881561248602009, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.46670957224939175, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.37544324742239676, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.41602211217571683, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.33491517492026424, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.714838523727054, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.49713060327965375, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.440129802760994, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.16195570128532405, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.581645267684411, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.429292711066547, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.1667955161379731, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5848202846227532, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.40854152133685306, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.41213231348812146, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.40435987083533204, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.34256683873776383, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.41477028165511615, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.4164061298971701, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.42052258014181687, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5709936728721758, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.40435987083533204, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.40562290854898025, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.33546955366063214, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.40319099863003527, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.39618802899930716, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.39618802899930716, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.39858613265631837, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3818534926571001, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3644112480028862, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.393379300802006, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3958941272081701, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3892064098781075, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3066682918799934, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3010381621698183, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.28783297914763095, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3958941272081701, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5948724602646328, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5042211795038526, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4425973012069069, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.511876122662448, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.511876122662448, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5049375875723539, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.511876122662448, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5582360999449585, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.48375513642780327, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4915933923809756, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.41469341972645324, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.39451521279220947, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.27447938256311044, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.615291848344044, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5516607622642397, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.47160616105623426, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5256353512715748, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.3765697091436241, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.31573558123189943, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.6989238098201116, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.22894156860669912, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.525025800664119, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.6245952145297528, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.44995700110278536, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.2680165156355779, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5989264158576341, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4425650919372919, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.32594818888335836, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.6263180162489238, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.33762297226992255, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4576529535952892, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5309982646782259, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4726395749383864, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.40052428191473877, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.39336600752225864, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5396070985289769, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4101715667811344, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.45834841871997833, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.31754227193241025, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3974726419025883, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2658483576665877, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6410540990527072, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.24601372576927547, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6374693500772332, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.1892240568795935, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6151179643430991, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.24601372576927547, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.686947433675709, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.4746119151171374, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5639241776831634, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5639241776831634, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5281061979991509, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.1892240568795935, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6668099404219522, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5554602680850725, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.41291750111233794, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.17181529671327242, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6053635787005981, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6562641136790542, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.46426595961938383, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.41238100267720657, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.41238100267720657, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.4806367958084579, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5013632657267051, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5066311799500233, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.4224991954993499, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5191362758854317, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5379068753129642, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.3348758882377771, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.4601349893675622, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5705717737418762, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.410846945789476, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.39909989628767284, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.39913709020460375, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.40443357144012176, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4121946181418776, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4455138569917551, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.42422145417131013, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.39909989628767284, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5606044053771457, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.40443357144012176, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4121946181418776, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.3113878808075066, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6728506998168392, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.3113878808075066, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6758978744760765, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.17181529671327242, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5293474685884572, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.23578316044531808, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5821373704411671, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.14528679532351443, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.514952316880994, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3128496839849598, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.4615425015629849, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.7427658453867285, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6371798394308665, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.30941048637024005, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6423124418413864, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.2970314818988727, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3292499962917628, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4425973012069069, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4410492519530161, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4429196299668147, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4425973012069069, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.47465074831919213, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4425973012069069, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4384930065736907, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.47465074831919213, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4373156210032521, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.44830378475308, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.2623399284064729, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4373156210032521, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3844263765000694, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3805770883173698, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.39818525322365445, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3844263765000694, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.39818525322365445, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.1667955161379731, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5802683403568892, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.23693055763743093, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6474126202050918, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.1667955161379731, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5802683403568892, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.1667955161379731, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5802683403568892, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5521590062829653, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6131017059052001, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.3113878808075066, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6728506998168392, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6131017059052001, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.35870004213153, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.697021248528644, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6131017059052001, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.1423071532720465, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5673078468780355, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.19923405658137924, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6211036406023237, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.4637878319059324, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6919476196061328, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3446073377034663, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7621696379946562, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3237722713145643, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7426638026175545, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.37030468338190614, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7587397825317436, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.545311114945696, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6120635842558794, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5303624596095554, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7835371347721495, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5658596262915636, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.1457684614972261, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5701800421590155, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.1711057433668069, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.1531682455208201, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6009917293478183, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.13369377363079382, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.25947507140745757, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.6659437947666702, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.49342175914364256, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4754189767029448, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.46847165370535515, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.2784899880299974, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.6722683601585776, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5310543174340693, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.293597382795084, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.6760199025405591, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.48825277132758194, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.27571859863660825, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.743408011301782, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4088276333455685, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4745035227847713, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.18842393723950338, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.5854975500881314, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.195647514979229, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.5725643788499303, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4352628824108997, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3620843366588185, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3550428472545064, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.327643461630417, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.20149416157064579, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.5931139543094289, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3782180895945298, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.507081939944787, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.16319679661526076, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3474927845768493, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.22179945921983923, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6249971903914197, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5753050684342109, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5116862201536014, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.22131477988685871, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6188310784475567, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.49289897908980135, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.514391848002756, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5141209056236068, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5121650809135759, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6189674633089594, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6248961527161889, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3555531255203411, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5116862201536014, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.33494612818381275, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.520472515533923, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.33471616336068044, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.3198143076622585, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.25944035160413503, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.38085857828188696, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4558620539723005, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.38637605442040596, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4054892447711709, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.172700810315234, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.33494612818381275, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.28977907494497107, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6663117339552681, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2961516536011624, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.7355780986981637, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2865612242047131, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6433813179203622, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.26518122980477765, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6541173886447416, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.26220676436185975, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.646323175287155, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6245566175148537, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3113878808075066, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6728506998168392, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6245566175148537, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.44261439814445486, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6285229436299309, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.29254488484029956, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3040559696901293, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6658994383739726, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4276859054768592, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.43620605921972144, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3598792258309727, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3258323649865128, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3209186108619747, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.45100597619813854, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.6245566175148537, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.6245566175148537, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.42278121257828405, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5459722940273104, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.22011791783839232, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.6212521406814923, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4813598669606701, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5343147728119615, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5125809225356253, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5125809225356253, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.511876122662448, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.47825370157575003, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.47825370157575003, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4935153102946312, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.47825370157575003, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.47825370157575003, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4646331830467803, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5063020142455625, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5963099883424426, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6245566175148537, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5539920925426138, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5539920925426138, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5633410521280906, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6265140753983048, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6245566175148537, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6265140753983048, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5963099883424426, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6265140753983048, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.27979820860022203, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6056889168578378, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.2102369368326755, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5768887726639784, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5326397959358325, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5226572946586268, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5226572946586268, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5210387656594864, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.48085787079671877, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.48299229988531533, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.2102369368326755, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5876589559170592, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5309753107573227, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5309753107573227, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4814496481274858, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.195647514979229, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5857714957546027, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4877445613866086, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5073395824633415, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5073395824633415, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5054194884603328, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.49233042976388086, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.6171472238624475, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5079994737492071, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5079994737492071, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5217233763439505, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4877445613866086, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.31122692088261866, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5362380779055197, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4148097947848928, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.33709347944719925, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.29382595610734974, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.31966312198190094, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3095023687399762, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.39257815659454015, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.33709347944719925, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4148097947848928, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4148097947848928, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.31805405607794895, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3195891679001926, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.23693055763743093, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7180407770761651, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3222538601891173, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7495871587703783, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1667955161379731, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5773664661124461, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1624355752882384, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6732778877516836, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3146660996956415, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.774919653861933, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.45984464012364756, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.23693055763743093, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6474126202050918, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6277082350099422, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3222538601891173, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7495871587703783, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.3005283910333271, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.2467789409989967, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.702397320872287, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.3437729074300146, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.30577290788405437, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.3053963874050995, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.3025031428331747, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.3038662664425978, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.2982854478221892, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.34100189594952895, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.3421048582632637, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.31725906238707696, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.3355517973989557, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.3329758884511854, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.3437729074300146, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.14962848372546667, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.5531110803538978, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4660343508894544, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.43340932146378, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4169735477570989, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.43189251696918196, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.41934944341336317, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.5242381673349054, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.5725197387086665, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4218058951037998, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4817075905641231, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.2075521577117978, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.15980518115118317, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.6117406545411793, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.20038908500140973, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.6177327642561014, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.21258844131063828, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.6314891370223008, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.15821285888349254, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.6605676082065987, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.5352059639541527, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.4817621003925206, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.18710158230410626, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.6827304460872186, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.5359775791756516, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.5220823443002603, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.18842393723950338, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.5761879138789098, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.20038908500140973, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.6177327642561014, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.44210435496259043, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.3807134866446316, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.6734021595321634, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5873831965245108, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5700887051433648, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5873831965245108, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.47099274965068205, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.41805694116981745, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3885961889310864, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5719181187428595, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5599655154601001, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5719181187428595, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.16432863675964413, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.558984948114654, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.4133673303529474, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.48231853956144055, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.37314692804855976, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.4120675260154046, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.4669656953948632, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.43325089547434603, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.4787742817228935, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.43325089547434603, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.4368967740154242, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.43325089547434603, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.40580143204058805, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.4787742817228935, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.17694975149532557, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.4902785344040517, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.1892240568795935, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.5196343731603573, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.4254686256509745, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.3088290057043984, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.28985008910948157, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.19951581244033986, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.21972813874997157, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.43044062502463715, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.4103552603347404, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.21236579931503258, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.1196655750514248, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.43807296710349614, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.1790439216234942, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.30577290788405437, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.20793313992045814, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.4271693186358773, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.44583799328544693, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.44562997179553193, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.394895635806623, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.24831335764102336, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.4334317870334209, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.4711738743510459, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.3394357133920106, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.41527307940938124, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.012870012870012871, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.3452137418984674, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.37030468338190614, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7587397825317436, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.29420957081163707, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.735955064899578, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7290399536251687, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.22894156860669912, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.6279102184928337, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7290399536251687, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.658571547163188, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.6568979068982934, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.22786788980326644, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7242044123301367, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.6568979068982934, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.6245952145297528, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.4047055149633949, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7290399536251687, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.40933226567881303, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.5099813007320333, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.46778058365701697, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.4410456674024549, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.40005353469571986, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.4670473691722499, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.5060729263955541, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.5187697231766483, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.43447037786636045, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.177662262614737, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.40933226567881303, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.1433583753123658, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.49048115595910957, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.36660871058936323, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.43058619444646323, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.2155389842479503, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.2604481569923449, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.511876122662448, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.47698240079411425, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.45987051548888325, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.19164010393444778, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.2595747500221293, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.4698447771642698, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.2891112498777974, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.20506702518574138, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.22436115527072104, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.18954169863009754, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.16544469489755873, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.514916507474197, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.2624553065941702, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.18020185542463263, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.28803368842227195, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.14784675458566054, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.16593172081379223, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.15084092981447839, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.44419236241196947, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.43706010794795863, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.3859454347966736, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.43016467114650775, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.382488360211396, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.3994368992494938, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.3806471506505897, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.4322358438398362, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.39126796773446315, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.4067122353485367, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.40969772112178865, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.4134635647455475, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.3934230844821369, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.40972213900070625, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.22713927769854092, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.4122705622809137, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.4297822673018643, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.45934745896511686, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.41874354622000565, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.21998251147567982, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.45798388821076286, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.3289300072190674, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.32413768919026276, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.34057065677205645, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.34057065677205645, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.3345333252096335, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.32001325532974667, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.3138244971309572, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.3155522046261439, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.306554798549301, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.32265595231344285, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.30978899606166077, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.32476827258498703, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.21142141714303078, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.43493490557877573, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.28991415068332943, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2535985303909064, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2906936157823074, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.30032663294181017, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.4169329809261592, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.24141428403439927, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.4243475188011289, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.23943445872806784, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.2044800736021839, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.42460680673338275, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.27812527095899386, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2610708875934103, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.41597114236951854, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4350989271447826, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4297476286175239, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.42459015345630374, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4173469189995656, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4322857520745532, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4322857520745532, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4322857520745532, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.41231967817566284, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.41850408232257996, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.39529824050490364, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4322857520745532, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.6390929517394389, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4714767063337979, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5124045843781208, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5823727987141498, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.40525561144552713, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5052295296496148, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.65462584415807, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.6418812400466414, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.46854856592314836, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5129829767946291, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4317996505616924, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5991944525613995, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4506325776546161, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4754711104013267, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4775280122429458, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4930829209420314, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4070672647268937, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.6406089576789361, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.65462584415807, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.6406089576789361, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.41229295635175445, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4725994974466954, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.3527713788852087, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4506325776546161, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.18900427606312895, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2379375267482382, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.1935812904783315, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.15694956267772112, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.1519320263160399, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.1589166045294459, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.39028093982330686, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.24534473408944998, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.24125600379049897, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.17016732449779756, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.16493998136610505, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5406340703314851, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.2102369368326755, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5597860306970691, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.2853612459193062, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.32876879948340443, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.25426284054822956, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.17586616574792086, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.23693055763743093, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.6177791303755155, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.2777563958324541, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5168203343451147, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.1803079973226055, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.30824944361075485, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.18842393723950338, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.5854975500881314, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.15774545980684188, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.616730419953906, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.44478883235565975, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.5211859078984742, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.6038769794796149, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.16467029855845897, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.521517293959126, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.6568979068982934, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.469904565627719, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.21651956746181053, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.6330972445771483, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.37066963888928744, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.5290142170815124, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.1400373960430748, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.37309390213978083, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3718896131479321, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3101994394372559, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.36006710624510274, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.32694307847435544, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3601131188427998, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3225168425405649, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.31177283358766017, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.2350555071806251, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.34099102423958866, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.17534823156623092, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.4853402799234523, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.32889384774917263, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.2497159712696397, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.17444346314545967, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.18266088622993074, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.21918702676823268, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.22637359354764466, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.6010070471156334, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.4715150407255315, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.1647463149973908, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.4104909902372063, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.20549680977299417, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.18262078594284786, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.5335315304967084, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.42970160394394363, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4370528005274534, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.3649154493210098, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.3512855039522258, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.3871560819733963, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.42850086056861364, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.6423124418413864, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.34887639426636174, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.5181731608711286, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.3328231949727341, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.40882161860042143, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.12716724199879337, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5690206807396397, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5038920264146319, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5395882240381418, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.42988803625606836, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5014180485937552, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5350011214551942, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.570777031854836, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.49292600165461214, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5046162213845391, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.4210157243284825, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.49016646730730157, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.39818525322365445, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.554411787673542, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4146222850152668, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.44045974327436815, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.39680097980725987, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.43833368912949555, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4990914601523915, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4990914601523915, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4143896526938585, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.5521515399820418, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.41400989596585114, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.5088672498824739, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4194210013201768, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.5277042186210706, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.3827580433841417, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.46171989402470454, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4051297881219653, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.3008659218202134, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.38636039023995106, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.36766978144243656, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.6002163368247662, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.2536205089423194, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.3482169841214968, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.15388831190795366, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.24316286154385877, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.28130650893311265, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.1955267872750564, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.3128973144973244, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.2317282154699006, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.41980622655065647, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.31748822774623225, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.16887665878255845, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.15774554820899148, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.26885090623067887, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.33867507760059357, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2996031251762614, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2065195596274271, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2091048289203972, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2137123752967766, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2146110419816635, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.36500123755057784, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.347144663116465, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.31527346354330105, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.15897677846800023, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.24497983229225412, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.13995623895459872, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.4674855890757815, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.47300840366486596, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.41241863293127407, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.47300840366486596, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3532706077955847, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.39742717581317527, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.41241863293127407, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.48109077206853695, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.44027072457059613, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3893385906010263, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6402657401259225, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6478447458847402, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.5695248405921098, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6375973667745106, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6024181637771958, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6402657401259225, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6426687509864203, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6390929517394389, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6677526933120659, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6810253452803035, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.522423303475414, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.22786788980326644, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.7387739811627232, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.1329604040267493, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.4471856677359072, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.452279977058944, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.5539920925426138, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.5521590062829653, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.19293080730252732, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.5539920925426138, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.4052165625503468, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.46326814099366476, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.17568454593879543, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.3791082020994395, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.49816993286090683, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.44863216660495664, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.3302903244452369, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.3313166446105706, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.2721408830033411, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.2035563167651274, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.44863216660495664, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.3445703908458862, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.3196493787756392, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.2298321888233454, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.3098123540103845, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.1342477978716863, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.42511022061090775, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.4279977800006272, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3865378580315333, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.385169510617517, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3579949693968627, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.7825422900366437, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.8503171627677965, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.502684666455707, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.07149669287185864, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.4657143140310299, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.39265131162383077, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.40281955727233343, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.17013461044703918, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.19400141696774292, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.1721293079939147, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.12863206872658067, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.1377576543423856, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.19591640810784544, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.1550261157185766, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.1609718736281563, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.17601889426326467, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.15614020477710228, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.16685494585396754, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.13064508924150248, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.4411812923020589, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.4179911123724079, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.3630314170535937, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.39881946127463486, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.22173190464216275, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.16727155744441868, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.5352038615270772, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.42874234643884424, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.49449516991634934, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.18861932879019175, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.1979106679566376, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.22637359354764466, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.6010070471156334, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5880855470290005, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5535439540882026, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.37334388072922814, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.49556192935701593, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.4912553801314764, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.65462584415807, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5679837784050215, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5873831965245108, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.2558970368401232, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5469398226382491, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.22894156860669912, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6299487983245466, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.15362208233245514, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6295157857600502, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.18842393723950338, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.5854975500881314, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.18842393723950338, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.5899792544547467, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.22894156860669912, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6299487983245466, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6568979068982934, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.5303624596095554, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7835371347721495, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6265140753983048, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6568979068982934, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.46014996368181593, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.22894156860669912, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6299487983245466, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.3113878808075066, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.6728506998168392, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.6200828204097578, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.658571547163188, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.658571547163188, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.22894156860669912, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.6329467036048876, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.658571547163188, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.3113878808075066, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.6728506998168392, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.6417603075499863, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.658571547163188, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.658571547163188, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.2737928561916526, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5581978650607443, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.3113878808075066, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.6728506998168392, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.5800922255460801, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.5766882097318834, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.5800922255460801, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.5807783428367905, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.558235742045378, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.5731807188469008, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.6277082350099422, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.5744784106089311, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.599418106384017, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.4285022577748209, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.45932416060974035, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4666156174173635, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.3893867836646916, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.38246468665452293, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.388047366459405, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4666156174173635, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.441761958013597, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.43608445006847185, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.43608445006847185, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4282343341370423, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.5238865952545348, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.27828716886545535, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.43608445006847185, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.21732734812103588, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.2673895048733062, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.28158744196562724, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.16486756282784554, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.20255581298259964, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.1686130658229696, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.37041873534869646, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.2627375617772967, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.23558051670852123, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.14476982749981784, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.16632804710475912, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.13678452669387658, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.6382466300772751, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.1075467277016126, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.5012312009859288, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.4603854172427722, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.34281202986923937, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.23213285024557784, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.5328062114240609, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.3993751732887897, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.5177301811811107, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.14189921489362475, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.15971500975156616, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.44355652237335036, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.40736387061175394, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.37489485923390314, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.47418667083462274, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.2564816085214212, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.2767281181183261, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.45023339690464936, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.4487353880719661, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.29472525951124, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.17075562096098212, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.3093797352942689, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.21421851674109063, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.15753286601971267, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.21413630439620454, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.18294404750126714, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.18265664536277676, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.22528910215642392, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.43180333528957987, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.17635214465529284, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.18870691281979324, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.18957700837099745, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.20825973272491313, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.30354067465892703, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.4283147867664682, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.28583707879882797, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.2763272612915231, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.37329018470596154, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.3522470517026368, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.3800125473157396, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.4181721116054787, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.30059912918058257, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.43580099202398337, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.24528802733610966, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.3820163726862325, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.4802701406922108, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.490032576569998, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.47018722626716275, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.40669095468248206, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3364967199973792, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3412793461743446, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.2722589423069702, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.6457595781467534, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.2939031491424918, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3330112821010062, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.16467029855845897, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.6061986709228673, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3106079785428179, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.39468447173978904, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.2460260310809598, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.17283382641366998, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.20022065596322774, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.20681826954034505, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.16647457450300468, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.1736099371373941, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.40074832458844545, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.30027816373416877, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.3455509477754168, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.15923219431794336, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.19546100969052438, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.14697628025481496, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.24657376730321656, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.291098552209934, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.2514738930704131, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.2584734035489983, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.16655213460140914, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.41031802646691806, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.3163845376082513, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.04945189800447702, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.3285364659042227, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.19571523326731263, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.19055236832274566, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4263413781248326, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4359367610984378, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3562570068438905, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.35876705764201644, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.35593753263346334, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4429471433550604, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4225308643688333, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4438828875875134, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4438828875875134, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3074300439616791, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.36170303745193194, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.11383643766535269, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.16579761015459532, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.191825135463227, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.15889147720402258, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.17501431522455, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.18389425700362821, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.15961789994114992, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.1802392930711122, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.23426035903837622, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.1705216477556015, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.1638149737231437, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.18391384242077483, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2887138086538547, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6342291345998248, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7013062757071812, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9303769449292738, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2381658499765768, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9063898435384111, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8522456714074852, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9096914044088521, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.9457416090031758, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9892952933418456, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5087473540251254, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7647955332172516, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5087473540251254, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7647955332172516, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5087473540251254, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7647955332172516, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4234885228074744, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7410180114887145, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5738396574789242, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.798357133373606, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4234885228074744, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7410180114887145, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47375069012411286, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7543919667018285, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.2828367156737383, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7829685247145245, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7829685247145245, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.6626129614342791, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.8597893117683423, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.44543578807748957, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7513336773729535, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.4814564802258215, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7621649608882223, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.4625957988586645, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7341375356694393, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.5461499540157965, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7954823723658209, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.2404315522172745, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.49155714102395526, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3477250470582593, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7188419868243952, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6681898017773897, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.40276720463657734, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6529271690805427, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6681898017773897, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6681898017773897, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6392900613840917, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.404727200247809, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6392900613840917, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.4386229919587297, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.44897710722021167, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6862249089515978, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.5379348324975908, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.7703766110349561, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.30188353873287377, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6086565367747951, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.22391522968021457, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6087618281135659, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2704091953828695, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6207272323003366, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4621757041594117, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.22067731046885494, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5635661737033422, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.7829685247145245, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.6026286934891149, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.8025775976044891, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.6626129614342791, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.8597893117683423, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.6626129614342791, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.8597893117683423, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.7689532399280165, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.5087473540251254, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.7773819133344605, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.4625957988586645, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.7338978299765546, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.5461499540157965, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.7954823723658209, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.24011079455637607, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.19920494035049138, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.614209720001149, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.5896613549548209, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.7528914749586836, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.5300714512917181, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.7461630750708693, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.33359103227594633, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.701102363286568, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.5300714512917181, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.7461630750708693, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.5271017464925504, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.7749613594649343, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4335364472118335, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.6878319610579101, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.480771131185851, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.7032048786770096, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.24706467963183681, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4801289744823913, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6766690087429765, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3272712268138726, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6272846474183881, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3272712268138726, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6272846474183881, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.30421485886156485, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.566236392445952, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.32965129549221617, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.623436907204599, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3231203125477008, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5812275690118908, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.32078739729528816, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5817366082116868, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3231203125477008, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5812275690118908, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3231203125477008, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5812275690118908, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.44332438338421004, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3231203125477008, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5812275690118908, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.3665134361137304, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6118771029352303, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.28489318277723963, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5764325110247531, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2981792160679168, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5788026000794341, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.3942058093215873, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5878575558111695, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2981792160679168, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5788026000794341, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.3485799122645514, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6090575371936678, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.3485799122645514, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6090575371936678, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.3485799122645514, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6090575371936678, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.06088829927112382, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.4100134571476398, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5856608401367807, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.17098323692758396, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5216877937894046, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.3527295712700594, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6062826429226292, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.2799331151961311, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.6471892368478446, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8142499721936278, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.7012294787544179, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8478115719875968, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.40202477345336673, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.7469480084357536, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.40157733283424196, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.7133166401137868, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.6912804407652906, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8416888527493164, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.4625957988586645, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.7494665344743727, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.42612283570374254, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.7185121839177114, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.17729842264695017, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5199388279318895, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.23141570376732995, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5938624587877649, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.23114663823833642, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5786592584609213, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.23713320246552005, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.6106842970161642, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.21690365808279138, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5384773678665918, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.23114663823833642, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5786592584609213, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.22128776529156546, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5609439249510223, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.1998573974138024, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.540043957078071, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.3282518529729176, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.6453010665294326, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.30752616970214336, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.6051452460471443, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.310441435588881, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.6413164971104282, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.23114663823833642, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5814841210741494, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.22656720908801994, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5465750236858569, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.3060368950930089, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6736142284622013, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.6888365053466561, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.8656273480576243, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.25711386542134795, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6088853751738869, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.25711386542134795, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6088853751738869, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.3416581331218724, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6578570934289981, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.3423591961656694, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6570214418399444, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.6888365053466561, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.8656273480576243, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.629934465484704, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.3060368950930089, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6736142284622013, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.46965980060137014, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.629934465484704, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.45307778036928104, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6935397252637394, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5069487414732323, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7801245319017357, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5695988432761473, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7516103467926585, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.6358974376699329, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.736661937085844, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5695988432761473, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7516103467926585, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5695988432761473, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7516103467926585, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.6912804407652906, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.8416888527493164, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.8522456714074852, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.9096914044088521, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.45307778036928104, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6935397252637394, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.30614023358320086, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5870676308171808, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.2281399713503153, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6211104268881504, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7361065921505279, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.7196315267102845, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.8835331636515565, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7361065921505279, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7361065921505279, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7361065921505279, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7361065921505279, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.7196315267102845, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.8835331636515565, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.7196315267102845, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.8835331636515565, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7667541011433795, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.7196315267102845, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.8835331636515565, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.480771131185851, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.705252762035012, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.445107576642247, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6955301378913092, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.30752616970214336, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5976254557718147, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.40157733283424196, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6532350818978572, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.38091370416670794, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6438225861756911, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.31374450602681464, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6422405832556486, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.4924584878270648, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.7062510642584722, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.36227557436010244, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6470050797908481, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.6383964846132485, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.8155153170229187, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.419468515826214, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6664000694648706, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.010321080079207262, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.06492787287290114, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.4938015541936678, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.7820348786317745, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.12858902882463452, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.35477908164501704, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.3942058093215873, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6316031412228033, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7046532915279582, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7202697992734389, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7202697992734389, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.1943759862788499, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.49688103957939267, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5461499540157965, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.798357133373606, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7046532915279582, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7046532915279582, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7046532915279582, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.11970700565377682, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.23357697166633196, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5582260842665357, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.24363783193706642, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.3903594390682207, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.6662116837137958, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4727805712999679, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.7717158158167359, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.36816017035411847, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.6630063658071765, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4335364472118335, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.6966914157873363, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.39174440233850644, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.6762795187534849, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4526810222444627, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.7303764654257315, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3809666991864665, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4393160369685383, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.7326708250282779, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.16449149670902838, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5337097549575721, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.6349495142258627, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.7749613594649343, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.4719458927872361, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6863265729154345, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.5309354663044072, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6990707992725005, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.43385612637937937, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6552557413442657, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.4719458927872361, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6863265729154345, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.37973023491174585, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.727435179202121, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.4598036015897535, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6256401299595566, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.43385612637937937, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6552557413442657, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.32084466348045076, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.22063120635885589, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.5852924591274146, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.31008822704072875, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.31008822704072875, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1673872929477023, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4506667273103674, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1673872929477023, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4506667273103674, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.31008822704072875, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1673872929477023, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3836374068673084, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1673872929477023, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4506667273103674, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1673872929477023, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4506667273103674, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1673872929477023, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4506667273103674, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1673872929477023, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4506667273103674, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.31802371065401513, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.31008822704072875, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.224188058954654, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5978847447208526, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.22894939325531252, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5747669845604989, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.27545321289806546, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6280000881172884, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.2680165156355779, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.545567244447617, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.5461499540157965, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.7954823723658209, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3086172473271798, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6217822674304354, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3495365897197661, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5973579837199989, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3086172473271798, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6217822674304354, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.36539169772085134, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3411488281065382, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6740035136770584, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.19920494035049138, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.614209720001149, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.7221847203387323, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.8931067231936596, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3416581331218724, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6578570934289981, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.92923260511913, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.23357697166633196, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6610479563844994, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.23374920560961487, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6381858968225665, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.38411167208361274, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.7037084318891839, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.220294066346937, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6375628454216249, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.23357697166633196, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6610479563844994, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.6026286934891149, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.8385943306861641, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.13737279171076758, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.42785667387454995, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.17923344640485428, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5211683330085515, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7644556249154987, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7342525133793019, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.4797543511401896, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7240781310560407, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7342525133793019, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7342525133793019, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.2677353447271197, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.569529411820844, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.20323131695812172, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5371005942781321, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.5401725898595141, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7143127337179475, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.5679161104357995, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7564733289707379, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.2567770437062668, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.38457089506267517, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.6582292681072595, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.14107526427034148, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.6401876410870359, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.7526484951226097, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.5184341074271375, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.7295047041623038, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.4252502464011162, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6774296788457803, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.42643704825557327, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6730449758221991, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.5267476983756256, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.71821462156359, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.4252502464011162, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6803639512204375, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.42643704825557327, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6730449758221991, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.28648682864686603, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.543546241720005, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.20313747122261766, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5392632080295834, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.18623343474790552, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5348516130206653, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.24914989711092594, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33057129676705455, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5669225664686625, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33057129676705455, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5669225664686625, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33057129676705455, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5669225664686625, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3240220869485148, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5364140651922888, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.4301823405286034, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33057129676705455, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5669225664686625, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.4084622939366714, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.41786513699087335, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33032772118856274, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5637799127470854, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33057129676705455, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5669225664686625, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.4301823405286034, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7697646564917222, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.5468017145144113, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7519227909172003, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7697646564917222, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7697646564917222, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7697646564917222, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.5420890779002704, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7268331815757023, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7697646564917222, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7697646564917222, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7697646564917222, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7697646564917222, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4790714250659131, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7010793195917541, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.5420890779002704, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7268331815757023, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8522456714074852, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9096914044088521, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.617939643800199, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.8356543644789964, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.617939643800199, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.8356543644789964, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.6912804407652906, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.8416888527493164, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8522456714074852, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9096914044088521, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.3386854985606571, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.604413581883028, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.7663313999772253, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.30042054271881197, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.27720938018510377, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.428047180290638, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.30350690419450826, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.2813985981593422, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.28107488868712643, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3334615788010355, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.2770051233854291, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.42513375642407447, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3050638713235347, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5082087402765254, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3395533581184405, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.30142704700265815, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.2773273497281852, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.29942074717273737, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.22847893469128855, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.6281881652405527, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7361567090943679, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.569133886912883, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6834516951654327, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.16807611261595506, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.4597054186181326, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3343063479794574, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5429171669983389, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.2915369229944523, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.535395621261131, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.4504780990115136, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6386322492678208, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.4504780990115136, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6386322492678208, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1860962119549805, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.44847510774689797, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.623652672746999, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7064310568035931, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.21241088191397664, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.21511238963872098, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.48967538401421223, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.18951629567590744, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5515559648122452, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.27249745234058675, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.576487806400357, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.32078739729528816, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5779838399768712, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.32965129549221617, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5788023273137882, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.1860962119549805, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5438504570088443, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.19032892442937785, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5194565258434112, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.548958765126221, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.7425459638873632, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.18437427949667837, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5211412954589442, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.18951629567590744, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5114358081515511, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.3365047447281543, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5791325287918098, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.14598608091257087, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.4458625802506543, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.1860962119549805, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5438504570088443, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.25811803218589047, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.4814564802258215, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.7954823723658209, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.3742893656007335, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.7582803042224814, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.6316839256114659, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.8143078359179658, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.5069487414732323, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.8112065454752675, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.44543578807748957, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.7492834759166062, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.4814564802258215, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.7621649608882223, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.15573964185427053, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.3372949202573946, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.22894939325531252, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.6048598347770396, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.602867050301643, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.8176176657543648, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.5386695403411698, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.378882732439682, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.6841096204411963, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.5738396574789242, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.798357133373606, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.2111187176080899, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.6020583416224236, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.6947687298202525, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.2534837513667069, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.17601203382268035, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.671938683171001, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.5206571060403834, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.689324258927, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.30344371233327844, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.6219235056961488, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.21555378801920327, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5577976700241679, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.21030548059060677, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.41421927364643524, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.6689624906287334, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.4063022828070774, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.6789996206024372, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.3237833370387541, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5986110578496675, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.3957399456352439, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.6505957913794083, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.11217219041746629, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.27571859863660825, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5218771218644234, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.4174441728660793, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6692136096184196, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.3984098807009828, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.636016958488394, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.3984098807009828, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.636016958488394, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.3984098807009828, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.636016958488394, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.3984098807009828, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.636016958488394, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.39811631946890474, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6320908834639722, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.4174441728660793, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6692136096184196, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.4174441728660793, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6706681340881337, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.4174441728660793, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6692136096184196, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.39811631946890474, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6320908834639722, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.17837875461384597, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.3984098807009828, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.636016958488394, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.4794224895461657, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.17150296156301634, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.48812954881732445, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.46076979395163187, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.39000168645396877, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.35094536062899695, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.29898487912917937, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.2291182149355119, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.6293162592248092, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.22848056414159593, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.5921402782211889, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.21511238963872098, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.5217348733264977, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.1513630224364002, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.32937492594263224, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.18879521773374403, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.4618333673677675, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.29623686353922923, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.41682189465797687, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.6573099561830166, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.2852636439147137, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.5851048071392815, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.17636478563502966, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.5283932773245016, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.4203546552244347, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.1196655750514248, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.29141398801197316, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.3143882918965084, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.6566540385253401, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.3088448141335011, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.7035462512447451, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.4186091892833126, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.6393114196475629, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.11856660123276004, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.3311682798096144, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.3164257177669852, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.5851860325042342, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.37494051432044967, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.661973437204244, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.8522456714074852, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.9096914044088521, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.37494051432044967, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.661973437204244, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.6358974376699329, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.736661937085844, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.4126152034907945, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.6941474239078328, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.5695988432761473, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7516103467926585, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.6120737901860179, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.8083636300305905, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.6452772832060505, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.8169530087932871, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.5965623111029279, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.4878595420976541, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7843954055342302, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.43600387912116445, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.753502627596917, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.2091599003776314, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.6050299218248014, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.30407761511253945, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.5461499540157965, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.7954823723658209, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.14628563604185, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.4777301300307737, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.29463458509790974, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.3659063107278196, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.2516441111691874, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.5379762757309059, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.445107576642247, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.6959585094274452, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.19803162353826262, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.4896673252212308, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.1719815974592925, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.48509337647058, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.21542121044605517, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.2770051233854291, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.45226514916414134, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.14221479650735855, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.31177258041697303, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4558951086991579, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.26035572673286655, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.40109985662775005, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.28460812517661593, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.40547044606076843, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.2966090320349725, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4554891527388646, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.21044444652079192, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.31177258041697303, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4558951086991579, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.3109048971841926, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.46948666843707054, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.31177258041697303, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4558951086991579, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.06180170963975448, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.2966090320349725, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4407593221936027, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.4912131536580228, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.19018868394774802, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.5224363928471276, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.2989381657659374, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.22787958971339076, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.31471886527056153, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.3186225396765539, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.28648682864686603, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.5869807022024393, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.39047054966928285, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.35658220852248057, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.4755135386468395, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.22543269140466307, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.26195614303411313, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.3052690053887312, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.37754323999245865, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.6551391601089249, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.007047108999241661, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.3143882918965084, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.6566540385253401, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.19075975291258387, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.46866709139162926, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.7535887063318502, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.49779244057305255, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.7564822254497499, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.445107576642247, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.7263332833450973, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.3875407750115175, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.6320601493723194, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.4814564802258215, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.7621649608882223, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.4441961115027302, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.7565542718609186, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.4441961115027302, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.7565542718609186, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.4441961115027302, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.7565542718609186, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.022094354803669156, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.17001078098404226, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.5419100975160638, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.20731650338051813, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.30094298890378757, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.5463695830483137, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.4529852871970908, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.6379815839992429, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.5192080836782018, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.7354576496586976, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.2476165058078653, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.4758373883319851, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.30702571862234085, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.5468678237231712, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.31620074377638474, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.5547605030697765, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.3570583512587401, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.6254475447872198, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.3164257177669852, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.5346911495697637, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.2680165156355778, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.49832559693883355, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.40505565245920605, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.4719458927872361, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.6884790828920573, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.6912804407652906, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.8449079689944796, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.47229389414007084, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.7400562860667964, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.6912804407652906, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.8449079689944796, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.44476089284108944, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.7117099802230009, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.464417424315424, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.6912804407652906, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.8449079689944796, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.4652992071811419, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.6912804407652906, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.8449079689944796, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.16024827804273534, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.5317530290531944, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.5401725898595141, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.717128056256897, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.5401725898595141, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.7219273458493682, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.44353395455270217, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.6913921626327173, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.445107576642247, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.6959585094274452, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.5401725898595141, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.7219273458493682, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.5401725898595141, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.717128056256897, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.5401725898595141, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.717128056256897, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.5401725898595141, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.717128056256897, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.5401725898595141, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.717128056256897, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.5420890779002704, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.7268331815757023, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.1754724247395998, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5107757383228504, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.5401725898595141, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.717128056256897, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.15821285888349254, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4716642229159947, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.39537383933343595, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.617311647158499, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4393606972268638, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.33359103227594633, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.600423959503607, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.1090009697802911, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4398327744078621, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.27330421266729565, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.2693466632631657, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5309206051118546, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.15821285888349254, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4716642229159947, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.44582080548137204, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.30335283306274363, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4283853203897149, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.19653306323688033, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.42643704825557327, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.7385191646867102, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.3984098807009828, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.6511785024442115, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.3984098807009828, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.6511785024442115, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.3984098807009828, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.6511785024442115, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.39811631946890474, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.647088606333153, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.44542488150142195, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.684375153574237, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.44542488150142195, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.6856658569072438, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.39811631946890474, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.647088606333153, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.7663313999772253, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.01886112664631915, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.4466679873664062, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.6799281948338153, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.31011575752288345, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.6452682411767686, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.5021277621795815, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.761461458169805, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.32393211943598493, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.6474115867020543, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.127408104603236, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.43993351395478764, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.09594785034023696, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.3865833291360058, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0810371533925042, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.31506451640481287, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.4126152034907945, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.6732486266096863, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.32393211943598493, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.6474115867020543, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.31011575752288345, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.5614133812306671, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.15462473462874404, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.1532685994792829, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.4662651599106109, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.1998573974138024, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.48166604565689325, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5155781222766946, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.14757581190431865, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.4573311375774372, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.15415064977510756, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.4576774423186101, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.4592064719908953, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.6719224520740146, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.309149936440332, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.1572663785778846, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5122325315328802, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5123550952856714, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.15415064977510756, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.426724812729464, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.3165967665056337, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.1504843536148922, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.45203030924244314, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.4391254859388873, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.7138345915744736, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.7663313999772253, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.28320384389628495, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.531318006400462, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.30643882011101126, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.44543578807748957, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.5461499540157965, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.7954823723658209, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.5461499540157965, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.7954823723658209, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.3951500216160541, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.6335042145699192, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.33359103227594633, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.708644913877036, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.480771131185851, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.7032048786770096, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.6358974376699329, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.736661937085844, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.17059573701616795, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.4753746252238087, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.40783219447079366, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.21951524426618454, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.4774650578315169, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.1868514164295723, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.5483968819141473, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.18759202316167214, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.4754599799412878, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.480771131185851, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.7032048786770096, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.208795826063924, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.5361160056750558, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.13817790393734294, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.368011314076858, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.22063120635885589, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.5781205353252427, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.18107197870881736, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.514661439036253, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.1813423031516851, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4972101263590737, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.18107197870881736, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.514661439036253, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.18107197870881736, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.514661439036253, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.1624355752882384, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4724990991697275, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.1860962119549805, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.49308679743240463, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.18107197870881736, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.514661439036253, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.18107197870881736, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.514661439036253, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.1712473044894657, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4635173016830622, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.1860962119549805, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.49308679743240463, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.1624355752882384, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4724990991697275, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.1860962119549805, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.49308679743240463, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.19835441454182887, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.6062730082124886, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.4074362040846933, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.7664523614495178, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.41443024325505773, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.6339071977529499, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.6912804407652906, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.8416888527493164, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.31941303791548753, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.4444385005047057, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.4441961115027302, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.7565542718609186, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.23376909505556828, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.2281399713503153, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.6211104268881504, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.2797830107070484, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.4727805712999679, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.7717158158167359, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.4727805712999679, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.7717158158167359, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.6030730571413818, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.8475480354796681, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.29945160623183903, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.5546772816797799, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.4814564802258215, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.7621649608882223, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.3423591961656694, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.5502001052739403, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.5461499540157965, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.7954823723658209, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.15875722180934987, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.383354750306024, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.24769802565621082, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.6086092624563071, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.39670882908365773, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.5348878791728369, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.27447938256311044, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.5315032895817616, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.39670882908365773, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.5409379877245147, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.39670882908365773, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.5409379877245147, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.20124833529317487, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.44401287900537567, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.1719815974592925, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.36994072673675993, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.3426116434593994, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.5133388823873302, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.17894177180728454, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.44133619978439725, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.16219748681741689, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.32210458788767854, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.16102642769112474, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.31256889728975074, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.3231203125477008, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.5812275690118908, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.1906936342773436, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.5160021246888273, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.2534743707366162, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.6254912096804822, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.36291227725384023, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.36291227725384023, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.28812859193424567, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.4255747984644291, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.2595151369628945, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.6384214365516487, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.465921984618579, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.21576146358278564, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.5550645714484712, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.23600051863022123, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.3782353749787568, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.33713540983351536, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.6250009083207365, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.5186653964016543, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.6561896817871797, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.40562163465277223, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.27779711191658313, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.34011486844537747, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.21668238955829155, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.5367532631666345, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.769322145613854, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.27338866536239, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.5914573885612058, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.2297523682812302, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.5487323556475315, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.1570855113100852, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.21294973841939238, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.2922968824016215, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.7829685247145245, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.3632703907932562, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.6409597524502569, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.7221847203387323, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.8931067231936596, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.44543578807748957, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.7513336773729535, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.319857965106966, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.5393982413372412, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.33359103227594633, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.708644913877036, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.33573064840973227, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.7081054397334158, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.5263595737059831, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.33573064840973227, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.7081054397334158, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.39022736644855677, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.7405955935175441, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.2927057121559396, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6662552505924692, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.48740622698799413, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6794397309157819, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.39022736644855677, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.7405955935175441, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.39022736644855677, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.7405955935175441, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.3070898761263382, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6756152855124968, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.39022736644855677, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.7405955935175441, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.18759202316167214, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6184696220924114, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.18759202316167214, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.557445955724393, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.1375101316530452, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.5540102467708582, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.8012607361988002, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.1897992267368494, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.4726855583591889, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.35559255894860375, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.3109048971841926, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.5254124510546129, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.1860639131207794, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.48181149445310956, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.7675828789334244, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.44543578807748957, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.7338978299765546, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.13352096115615372, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.4074362040846933, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.6464385241097694, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.7482524153102477, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.8447038922744422, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.5021277621795815, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6665605281744408, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.480771131185851, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.70066471582382, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.4922088386970059, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6106264390339488, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.32181275536083825, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.5021277621795815, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6982774997236794, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.7076534431960262, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.8413115375600476, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.4719458927872361, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6681602842119448, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.4922088386970059, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6106264390339488, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.42916222731145903, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.2567304004995466, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.5526271274789324, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.1821163528973126, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.1983544145418289, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.4195908478809098, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.27970267298955453, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.1378592993183041, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3752233237961983, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.22147622285255003, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.1023857820560022, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.38421646372776175, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.254816209206472, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.5814444640902606, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.1969221590285716, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.4139107793324548, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.1733705613469748, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3993932385978296, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.23927943403430146, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.2036972232991139, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.32937303862037204, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.3124684968073947, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.29948848396607075, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.29901594860271813, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.15885824292629303, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.2396544472075596, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.3711366792786969, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.25913517321015245, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.28697807819754534, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.28653249812917597, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.22607786658046147, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.2296291837985481, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.4341999352730602, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.6745907228091957, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.1986589078880532, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.5285168275193599, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.20110004903792847, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.45623478126637707, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.21555378801920327, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.47269072275515744, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.21063357946200129, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.4560703538905584, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.2281399713503153, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.5079006160677625, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.5300714512917181, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.7461630750708693, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.3353598298584452, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.17265324947760644, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.5397778205094209, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.21518838690610018, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.5606804480411077, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.25026408934028455, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.4719458927872361, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.6534040262605951, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.1987777011513927, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.36857838224116973, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.6856616009150279, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.3384653583738009, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.6082869404281873, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.2476165058078653, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.47909493372494205, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.3384653583738009, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.6082869404281873, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.3299895472527792, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.6484644951902464, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.42612283570374254, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.6716237521842675, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.44542488150142195, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.6856658569072438, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.445107576642247, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.6955301378913092, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.5420890779002704, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.7268331815757023, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.4536218833151678, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.19835441454182887, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.6062730082124886, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.47375069012411286, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7107240028283889, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.4727805712999679, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7717158158167359, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.44476089284108944, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6551098696198423, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.47207580389427084, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7434109160179552, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.44476089284108944, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6551098696198423, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.4464617303464354, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7384411540866627, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.44476089284108944, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6516368935552685, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.44542488150142195, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6856658569072438, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.47207580389427084, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7434109160179552, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.4464617303464354, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7384411540866627, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.4232354733407505, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.47375069012411286, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7107240028283889, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.5461499540157965, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.7954823723658209, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.5461499540157965, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.7954823723658209, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.44543578807748957, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.7334918117278213, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.5465526716276092, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.8012679276648627, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.4183007445500922, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.6544146882590995, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.4183007445500922, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.6544146882590995, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.41682189465797687, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.647688351711303, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.4441961115027302, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.7565542718609186, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.2869066874289222, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.6076623179917158, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.4183007445500922, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.6544146882590995, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.4183007445500922, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.6544146882590995, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.4186091892833126, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.6584767887623714, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.4183007445500922, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.6544146882590995, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.4183007445500922, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.6544146882590995, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.4183007445500922, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.656180873465862, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.1817144072367102, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.5200176131748395, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.803154665668484, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.8805305626734038, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.6838626312597372, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.8481552379853444, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.803154665668484, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.8805305626734038, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.803154665668484, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.8805305626734038, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.6289868866690355, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.8095082593395664, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.6289868866690355, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.8095082593395664, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.803154665668484, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.8805305626734038, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.803154665668484, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.8805305626734038, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.803154665668484, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.8805305626734038, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.6378379852740232, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.803154665668484, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.8805305626734038, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.4462689092414285, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.20323131695812172, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.5370679638669973, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.46829007045350673, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.1818483989940587, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.4705600829216706, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.43690976318544794, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.31929652405610903, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.47876989915933515, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.3917533437213125, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.3642922752206821, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.1712473044894657, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.426932196025089, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.23546056552871467, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.398575696616437, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.42359095518407164, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.9063898435384111, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.23932595221309674, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.4673115526141697, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.28613818387978673, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.5528031676146457, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.43481494774721463, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.22271767371139256, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.6553609623522636, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.8080381263652573, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.6313922341364886, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.8051546664747079, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.505242776482945, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.7265524593382774, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.2027445624852463, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.2514227030863834, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.5227131146872793, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.2046592065585361, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.5139378364418256, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.39020358281213624, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.40763281626399495, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.36781707614204445, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.36583359636400986, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.5261112461035825, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.4931587502890964, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.3421359311004187, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.5068657796437095, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.26700459848070734, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.45614973690855576, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.2661828424443392, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.165838472529457, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.38908651109487247, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.3337972903996398, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.26307356948037885, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.2079701729789175, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.2377025655787593, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.42203897403177737, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.28758906080679814, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.29881258170574665, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.3059460816807008, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.17717634270740748, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.25498213295426564, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.17411036809769512, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5409636216635109, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.4374960951307028, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.6840706293465405, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.16679551613797314, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.4850274766865928, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.1943759862788499, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5338394442325974, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.1392908359945467, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.48531978068695414, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.11856660123276004, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.40773148598102293, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.6798017979514573, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.14411291670643006, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5000399749325595, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.16559113761114783, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5464808970807227, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.17411036809769512, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5409636216635109, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.37643606776410926, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.16559113761114783, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5030743954553002, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.3025029865727436, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.5564009706295315, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.4651954337860559, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3764940106481337, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.2797412354706287, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.5217401427389217, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.4763990880413316, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.28302740134070886, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.3014335251508215, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.5726800490411352, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3631697646395501, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.47708987783257517, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.2725758492393828, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3923311316125708, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.4021117013686505, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.4152914707667959, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.36466819017308727, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.37043553303426646, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.41850774247348516, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.40740178389631576, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.3707717111254176, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.47519149773042846, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.3658006211440879, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.37421553597876317, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.22564262486685283, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.36926438076616647, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.15942178318590763, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.36039626112317097, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.5942499629418814, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.3078802898940204, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.3865320677199308, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.24505106440667512, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.1924244680058936, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.5087473540251254, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.7773819133344605, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.3710784497353679, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.24192619393259787, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.4921318417839362, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.12478083711714635, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.2863079147361709, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.3292010361291119, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.6484221669130951, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.4246163317880344, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.6675494539138593, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.278093559995945, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.5759531667584591, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.47375069012411286, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.7009885119411133, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.31573558123189943, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.5918125947853188, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.3083012995502152, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.6188233920257146, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.47375069012411286, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.7009885119411133, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.4466679873664062, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.6702025696488597, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.45307778036928104, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.6755119791745777, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.47207580389427084, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.6992480502085702, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3191766011456815, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.13566979610140004, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.46773464768769135, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.2869208283752505, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.3243192696860874, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.2229529832462866, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.28619462359984627, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.4274420047831983, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.3601065525200447, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.4045961455348396, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.35271270311585035, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.33071231815127045, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.3225479310829689, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.2509956074597684, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.35813948389425215, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3263040636562357, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5838790966762375, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3390387389794623, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6170420596680538, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3142665434344143, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6466526067220029, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3751840463233443, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6279894552667558, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19268479640608693, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.551397074868541, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17470942957770763, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5403400891349619, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.37392149096896676, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6665214662145853, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5763410052067085, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5460240376042262, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5838790966762375, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.24343304284910333, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6275577931282961, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.28571962561926445, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6431872581462166, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.4216890913810254, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6885217194158456, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.5014756677893482, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7958858211784339, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.6255340042200862, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.8724783049357475, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.5014756677893482, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7958858211784339, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3083012995502152, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6589376390020449, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3083012995502152, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6589376390020449, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.5014756677893482, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7958858211784339, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.29176300840900793, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6143650111703199, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.43021236941942204, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7142896582178452, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.5014756677893482, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7958858211784339, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.24090844358935917, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5468852870478801, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.6255340042200862, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.8724783049357475, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.7411155087367244, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.7411155087367244, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6744253146961531, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.7411155087367244, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.39545121937832856, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6963801389253689, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6991726442472661, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.4101479464529936, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.7041976254287654, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.4547900039222725, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6541971428810075, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.2919394073770869, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5957961314949175, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.17537670874647399, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4800889669735933, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.22845493240080628, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.584996891148118, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.2357664506880305, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6409280879253807, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6991726442472661, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.23272696712467975, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5794868721814046, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.38785611216800814, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6673259967761724, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.16331948281960493, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.350650198151987, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.8056920633274978, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.8391519966182309, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.7164026439677106, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.8020845125558708, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.595092211343687, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.7971172820981081, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.7122562458056777, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.7122562458056777, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4207937380724192, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6985308026285912, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.7369844404912368, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.7122562458056777, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3675667565747676, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5700185304500285, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.595092211343687, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.7945212279546889, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.43011383006801057, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.7140577175386648, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3843363395779093, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.7369844404912368, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6824395076981005, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.4896430866960958, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7719180936906627, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6590438071804039, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6824395076981005, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.4481489512240194, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7745649676018984, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.38305978177479755, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6061131723054572, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.34636800712900173, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5167955767158704, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.3675667565747676, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5397693417183738, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.425143650778693, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6674242019044293, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.421151249507493, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6938674571170766, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.41843795218458035, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6316283876832989, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.4803501444747088, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.7417101158248365, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.42221847853238736, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.6656008733100179, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.5124776602965491, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.7722874800637285, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.4481489512240194, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.7994721822064033, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.38754077501151757, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.598503332887995, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.3291598889023262, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6085546680624175, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.6173766800527999, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.857390040146912, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.6173766800527999, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.857390040146912, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.6173766800527999, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.857390040146912, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3470839302425112, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.4845766087853281, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.7138566289355139, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5582775802710993, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5512324461754572, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7289444696770301, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.3737098172408067, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6832201170000932, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5582775802710993, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5582775802710993, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.21338748895376336, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6034116935803774, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.22436571657855092, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.61166969974579, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5582775802710993, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5582775802710993, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.22423870508323301, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6366515193698862, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5582775802710993, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7317734491561229, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.6507561416639396, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.8215788698315908, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7317734491561229, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7317734491561229, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.6507561416639396, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.8215788698315908, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7317734491561229, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7317734491561229, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7317734491561229, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.251696695878184, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6180491939580447, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5967384019266717, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.8544348080833218, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.29170205300854224, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6498499527552988, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.2719326877457978, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6002086362682414, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.7246227738353674, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.29170205300854224, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6498499527552988, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.7246227738353674, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.28592291256793106, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6102727682426059, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.2774290545068997, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6397454944654261, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.2748202507307579, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5810363959809548, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.28571962561926445, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.693456244639743, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7121135616759211, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7395804946242599, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3684981984538114, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5606332518476288, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.3694816688798906, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.1423071532720465, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5234276250101042, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.8020845125558708, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7369844404912368, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7395804946242599, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.38036178325786096, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.20826058354833846, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5799650985757929, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.294467310498826, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4554141323944355, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7121135616759211, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.3201911827891037, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.7182383858693244, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4536404448264584, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.8020827133708689, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5134477225657772, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.22831386795944372, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.6930977635889574, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.25755472674357427, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.7079787462750899, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.30041915229862387, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.6689250750617529, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.25755472674357427, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.7217376192850543, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3142765374520343, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.3060368950930089, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.7004749900624669, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.35818640176176625, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.723627810424739, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.8084123599808738, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.4545091839935173, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.7166050399790445, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.2919394073770869, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6265777781732258, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.4345673759957651, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.703388118507387, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.43485418354574973, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.35818640176176625, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.723627810424739, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.35818640176176625, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.723627810424739, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.44797220217437844, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.35818640176176625, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.723627810424739, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.24939081998882368, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.35818640176176625, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.723627810424739, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.4812700337596407, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.7668482135865776, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3370129264673147, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.7096874943799061, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.31771674795486515, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6550628376568252, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.12648351910430983, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.19910401453355991, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5815343547138478, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.1624355752882384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4952968469712617, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.45307778036928104, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6384504056254413, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4201902477742268, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4741401979744739, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.191072229574376, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5901487703215178, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3800213082631731, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5676463425230758, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.44401360557563874, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.27587476896182844, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5801799655962208, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.2543881726648529, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.3099293756712212, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3766019021279213, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.7318674193893624, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.37489047453628294, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.7155230965848066, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.7807505267551733, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2697856975860103, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5736298373015629, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3766019021279213, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.7318674193893624, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.20390514683548702, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6747066998707847, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.8131513745396886, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.4909136024426773, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.8295116386418164, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5920266866634685, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3766019021279213, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.7318674193893624, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5084550790849273, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.1832567180568652, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5727346150299959, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7246227738353674, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.5595205105615875, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.8322210048001876, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7246227738353674, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7246227738353674, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.28592291256793106, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.6102727682426059, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.3171094709345114, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7045234516083255, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3060368950930089, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6834837188844622, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3942058093215873, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6697898834930974, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3142665434344143, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6466526067220029, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6934309279690296, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.2453392175275486, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6569130291153491, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.20313747122261766, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5506087730896332, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.4896430866960958, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.7815961723922495, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3763693611344683, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6360504215730572, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5760406199498378, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5760406199498378, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.14728212724124629, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.485741585706456, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.1973212456326944, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.4151043049244464, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3610544299180199, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.49125115898082056, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3610544299180199, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.49125115898082056, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3733543476417276, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.538395940979961, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3610544299180199, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.49125115898082056, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.20763578034718042, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.46035934390642647, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3733543476417276, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.538395940979961, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3610544299180199, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.49125115898082056, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3733543476417276, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.538395940979961, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3610544299180199, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.49125115898082056, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33891487511850005, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5365882254723207, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3733543476417276, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.538395940979961, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7164026439677106, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7164026439677106, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7164026439677106, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7164026439677106, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7164026439677106, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.23972125922151485, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5848344753614038, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.6734648419604768, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.7694606959147566, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8578928092681435, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9422733087334002, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8578928092681435, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9422733087334002, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.4929664394953523, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.6587225864765196, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.5955978088638718, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.8632174102523461, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.23487811400114963, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.35937816565888026, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.797323390576564, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.30041915229862387, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5110381669871915, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.4896430866960958, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.6750223515189266, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.42818224355402373, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.667901678840575, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.2453392175275486, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5365332655663203, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.23972125922151485, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.48707827505552054, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.30041915229862387, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5110381669871915, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.30041915229862387, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5110381669871915, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5382940226742914, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.30041915229862387, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5110381669871915, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3677323079275383, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.22738612304909625, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.47454858661827737, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.4803501444747088, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7417101158248365, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.38687573986922297, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6514359547109982, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.42105372680687736, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7001171094008295, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.19910401453355991, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5338904589112099, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.24233572351352062, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5675102323575353, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.4848137281002213, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.38785611216800814, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6213964982068823, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.38785611216800814, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6213964982068823, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.33425592140853283, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7005713730032203, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.8110307349404526, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.2767906930665974, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.6946453530067933, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.3865584077322271, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.7076640192892537, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.19547215688069816, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5978847581113598, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.5183282721440023, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.824367835388174, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.7121135616759211, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.7395804946242599, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.527528099078667, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4094709585736592, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.47384807927636907, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.19268479640608693, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.5479565964904024, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.7369844404912368, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.6532234058412462, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.6532234058412462, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.3567823943323416, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.21576146358278564, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.5945881910966203, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.23972125922151485, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.6266330371317139, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.7076271819674439, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.7128603669502883, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.3584668928097086, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.634863098567942, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.3584668928097086, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.634863098567942, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.7128603669502883, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.42221847853238736, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.6896985035484708, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.7076271819674439, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.1939280560840041, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.46312151331492984, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.7076271819674439, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.19415472735264994, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.41508997974031253, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.23972125922151485, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.6266330371317139, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.20390514683548702, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.6152907875442002, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.42062888241722096, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.6813469636986809, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.2600960555023324, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.19898107345153532, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5339708887761974, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.4770332228554784, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3490251488234659, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.39545121937832856, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.6822216627082669, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.20313747122261766, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5634928669626099, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.17389434573554247, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.24233572351352062, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5561045459288251, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.3942058093215873, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6294033705157869, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.3942058093215873, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6452705345581219, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.1712473044894657, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.5272789142558241, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.2111187176080899, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.5086920944994741, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.15929050399664219, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.5414849269145706, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.4909101855057947, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.39545121937832856, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.6541357656856408, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.43281826407421803, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.4282924873829561, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.4056299814865685, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.20028107620075963, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.334851704167788, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.6474532635641537, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.28615556452106294, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.6299182889624744, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.31118041688292913, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.33464494273746426, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.5983196805551743, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.18961526642588783, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.334851704167788, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.6474532635641537, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.1974694070034893, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.4787974949414673, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.713332477096005, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.32952451615212436, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.4924875778629721, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.4021713045548922, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.1842384650009126, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.20313747122261766, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.4583712036944982, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.12962472880491877, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.3044799424809889, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.39936742298908956, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.1641956652179752, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.2615311775021803, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.5508394512324739, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.6782734900436637, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.23972125922151485, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.5758647546570652, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.6576054208318073, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.4536404448264584, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.8020827133708689, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.24001896226067918, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.5944702899865559, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.17979969665124504, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.4918511602341556, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.4837853350093983, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.41368954504257266, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.6422762292356853, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.3836841681691306, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.7127478995829692, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.20479056612936936, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.5251515188723699, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.24237768532177115, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.39469536234609737, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.5917048915180981, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.39469536234609737, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.5917048915180981, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.15604242268653643, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4751382685885456, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.230440974470398, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.4262626090774457, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.7058556376289643, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.31446071400663894, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.5931614744771728, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.1278241696265761, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.19319794288373768, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.45771966440001516, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.15824382329465247, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.4020801848996587, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.1835554260049945, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.4427324890847145, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.2491316630275714, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.2148547638367739, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.26506234837226944, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.19998549292703938, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.22686182598679874, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.5340286051317624, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.28799583290763703, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.24220427601736638, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.27250051496110134, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.21813681724512826, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.24062089463790082, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.20312835120509382, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.4762668365393059, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.3407563025626974, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.6104226554223803, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.6407363191582277, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.6966460917682386, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.6407363191582277, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.6966460917682386, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.6541971428810075, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.3937848105507625, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.36684984164094486, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.6366318617371836, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.16692486522015718, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.4023647697112747, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.57359744419911, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.25270455578796175, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.571873510015388, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.33383285644152466, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.20312835120509382, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.4762668365393059, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.2719326877457978, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.5963825614997932, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.5595205105615875, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.8322210048001876, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.5595205105615875, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.8322210048001876, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.7246227738353674, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.7211812032548905, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.4812700337596407, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.7668482135865776, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.7211812032548905, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.39898493411026575, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.6660257584377366, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.2719326877457978, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.5963825614997932, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.2731306427308864, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.2719326877457978, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.5963825614997932, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.4770529960418919, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.19268479640608693, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.49975293173596386, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.15083364266523736, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.4907822977105627, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.15821285888349262, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.49028210447768544, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.14728212724124629, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.48638762628235294, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.45056319355400093, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.28597887157586055, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.6270330226583704, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.2415725261015974, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.5949018835911474, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.15821285888349262, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.49028210447768544, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.4408750259635687, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.4180191500256661, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.2668173065178967, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.5335299694016906, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.35818640176176625, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.723627810424739, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.4896430866960958, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.7980971476599384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.8084123599808738, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.35818640176176625, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.723627810424739, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.35818640176176625, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.723627810424739, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.35818640176176625, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.723627810424739, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.35818640176176625, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.723627810424739, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.35818640176176625, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.723627810424739, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.35818640176176625, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.723627810424739, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.8084123599808738, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.3955812506211637, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.4745180734945151, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2765896733581188, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5826805982089127, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.43483587481573205, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.6723935384652386, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.27080524311589804, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5735629822442805, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.27080524311589804, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5735629822442805, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2765896733581188, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5826805982089127, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2765896733581188, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5826805982089127, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2765896733581188, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5826805982089127, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2765896733581188, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5826805982089127, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2765896733581188, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5826805982089127, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2765896733581188, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5826805982089127, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2765896733581188, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5826805982089127, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2765896733581188, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5826805982089127, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.21576146358278564, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5673560872668851, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.39545121937832856, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.636466558635705, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.21576146358278564, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5673560872668851, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.1842490992269057, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5113499757807896, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.2165768464503216, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5757840553675324, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.2912014808653287, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.6424610716762174, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.2165768464503216, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5757840553675324, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.39469536234609737, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.6657467951920233, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.2046592065585361, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5887122703216473, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.21576146358278564, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5673560872668851, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.34993707212869785, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.2165768464503216, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5757840553675324, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.7005713730032203, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.7005713730032203, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.7005713730032203, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.7005713730032203, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.7005713730032203, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.7005713730032203, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.7005713730032203, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.7005713730032203, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.7005713730032203, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.2037792411904348, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.5548802330642336, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.7005713730032203, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.11385032360134208, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.4382795902467684, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.1365189729052536, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.4259340541380412, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2815871636550668, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.3806583469567467, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2691593314181093, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2742389123790289, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.44777955633393424, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.36659971468949054, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.3305228230404804, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.3993365662301727, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2941979168579534, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.16299446731288944, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.43649143020176306, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.2250861242438523, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5618434465935181, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.3238579233802238, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.6198368821194998, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.512336215207795, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.49345671324082974, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.20312835120509382, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5573572048061965, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.314655516390602, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.3365822615578528, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.6591579540156445, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.20300292520931204, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5545072586157459, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.3365822615578528, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.6591579540156445, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.32570267192540586, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.19469940719627615, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.5244232343746598, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.4481489512240194, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.8131857452490882, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.5124776602965491, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.6265447017943011, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.6730489965212471, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.7670434817254471, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.6730489965212471, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.7670434817254471, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.28592291256793106, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.5686492116636237, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.2912014808653287, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.7275929939966964, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.4481489512240194, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.8131857452490882, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.4481489512240194, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.8131857452490882, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.4481489512240194, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.8131857452490882, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.19511368322427836, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.5589893625764298, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.22714355926020957, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.5327568967360922, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.4896430866960958, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.6807294776537712, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.39469536234609737, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.6937261271262425, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.60585154759089, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.7547619819808454, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.4262626090774457, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.7343467434735558, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.42902664419909115, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.7443887915363598, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.4020760403449254, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.7016962551122522, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.7369844404912368, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.3610544299180199, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.5342348049013494, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.5424938760789326, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.8020816078177312, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.6507561416639396, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.749948047540145, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.33042158593448145, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.4375968762068432, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.6590522929608883, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.4812700337596407, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.6942705518980387, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.42195777059677314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.6687475942312653, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.3675667565747676, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.5989728676603553, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.48156738796358634, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.707316874318671, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.23972125922151485, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.5755240213917002, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.4812700337596407, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.7668482135865776, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.2396991920464788, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.48156738796358634, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.7157788903059378, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.25522917707099674, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.42221847853238736, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.6815801937310393, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.7673284019128814, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.7335705336375569, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.29715678881302643, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.661467129406907, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.29715678881302643, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.6509319807414574, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.29715678881302643, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.661467129406907, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.7673284019128814, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.7673284019128814, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.7673284019128814, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.7673284019128814, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.7673284019128814, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.30752616970214336, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.6106236483676958, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.7673284019128814, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.4464617303464354, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.7099628979634083, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.3843832649911012, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.6360002062017179, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.3675667565747676, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5575264207911254, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.38615059096335336, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.6261010061605436, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.33965884450200445, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.582814803428267, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.22714355926020957, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.6149327491870693, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.5890498835235906, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.8321239689935634, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.3675667565747676, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.6647046501418657, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.5577957421679061, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.6972259762695181, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.1917460913619136, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.42437073033751493, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.31912296554499103, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5467829654685376, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.42221847853238736, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.7096175474139502, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.2033897418920923, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.551556930942916, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.42062888241722096, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.6825498124526633, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.3675667565747676, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.611788827244731, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.20313747122261766, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.5346701852529732, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.21576146358278564, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.5587530087616077, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.42221847853238736, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.6896985035484708, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.36800882629132287, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.6460868517969176, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.42221847853238736, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.7096175474139502, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.23707730131910096, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.2706380285588004, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.5602052818707742, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.31771674795486515, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.6823632455739186, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.5397323593778651, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.8110662878512482, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.42221847853238736, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.6656008733100179, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.2111187176080899, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.5647907462051993, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4546795690250899, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.42221847853238736, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.6656008733100179, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.5397323593778651, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.8110662878512482, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.5397323593778651, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.8110662878512482, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.3884085226314684, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.48156738796358634, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.7671994551643374, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.38918346804460413, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.21644311639014951, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.4550086560720594, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.33188010562448456, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.307502324647974, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.3068845541987739, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.29264275195494416, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.24001896226067918, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.6014499104482237, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.2384726227721658, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.4820178233978107, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.44981557841577613, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.23509223658357026, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.14962848372546667, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.31921457459318575, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.2919394073770869, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.6265777781732258, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.40982224146042756, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.27080524311589804, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.45763886314510427, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.3521214014864166, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.29308590601052215, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.42902664419909115, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.7089330062523613, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.3086883400264, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.5960059844153068, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.23794506474388488, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.42902664419909115, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.7089330062523613, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2567626980454705, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.2998634479378894, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.6247826968095733, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.48156738796358634, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.7671994551643374, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.7590994812356263, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.857390040146912, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.7590994812356263, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.857390040146912, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.8020827133708689, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.8020827133708689, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.7369844404912368, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.3763693611344683, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.681475411202769, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3884153333348233, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.43011383006801057, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.7441960090869769, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.3113612721440885, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6244179228679348, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.19857943409196785, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.4841638348150365, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.24456656109396324, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.502026173233975, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.2573956940045279, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.5660567243461767, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.20229280648000492, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.5193630415443222, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.3113612721440885, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6244179228679348, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.7246227738353674, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.3113612721440885, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6244179228679348, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.3113612721440885, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6244179228679348, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.3113612721440885, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.6244179228679348, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.208795826063924, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.47509890161874874, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.21992062963866632, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.5471220923935656, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.1427404270947385, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.3766019021279213, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.6426846682861654, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.3767656346408826, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.37693555882757257, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.35816242771443213, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.22060794501194753, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.334851704167788, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.6009673735564677, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.27080524311589804, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.5251724178189929, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.3975448812222411, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.2174536498549041, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.19268479640608693, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.4945481209434918, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.33807764768133375, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.71426422535372, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.44701617851855957, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.8047183456557263, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.421151249507493, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.7602108728496834, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.24062718841066488, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6778014913685915, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.20617350508583818, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6739851297272836, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.24467340606531432, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.31671615012203974, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6560671328641873, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.2917591430729611, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6844204996787111, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.3793086863337399, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.30041915229862387, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.6550573187445743, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.6507561416639396, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.7392285437932827, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.3062859135460401, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.6540898825644205, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.6507561416639396, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.7392285437932827, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.6173766800527999, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.7301794230871377, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.484611284323379, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.48936469277309125, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.3763693611344683, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.47084569901172335, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3429043870200186, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.20207938879963666, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.5275433362317532, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.2965071539728828, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.5085503390295181, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.7251727471866002, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.4356925719771587, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.16246736614250729, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.38605159790728016, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.3026944877822123, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.38390149148943287, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.4362508313532012, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.2600488816870883, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.17829987290849303, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.4191501080003414, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.3934356665260354, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.031442147565579066, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.25772455902514985, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.1969221590285716, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.5206656710605527, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.4079052344385883, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.3439550611757983, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.8084123599808738, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.30752616970214336, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.662093020699087, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.29170205300854224, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.5600853382301801, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.5332286348751792, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.3308959815150696, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.8084123599808738, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.31327681146619374, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.6404873704225963, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.36684984164094486, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.6276674727087102, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.27105363860597637, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.43406631668987594, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.511075227027215, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5741842828404965, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5335784441425054, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5335784441425054, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.24062718841066488, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.6139570750776484, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.511075227027215, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5741842828404965, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.511075227027215, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.47980800108851346, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.511075227027215, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.452106591437223, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5741842828404965, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6529241277890402, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.4896430866960958, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7410529316463808, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.4896430866960958, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7638521785649908, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6529241277890402, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6503678865722725, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.38513414673376833, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7120077407246694, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6934914549971836, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6529241277890402, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6934914549971836, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.31771674795486515, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6036038206046929, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6529241277890402, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.73719964992947, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.4797543511401896, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.7030838074817461, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.4797543511401896, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.7053481527490161, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.73719964992947, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.73719964992947, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.5967384019266717, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.8544348080833218, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.73719964992947, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.46298522813477694, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.6897393951285803, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.5967384019266717, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.8544348080833218, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.3485799122645514, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5870764478159658, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.4328015276270854, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.7015297445241917, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.7673284019128814, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.7199991365237522, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.7246227738353674, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.7673284019128814, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.3084977337313932, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.615980419333811, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.7673284019128814, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.7673284019128814, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.7673284019128814, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.435949382480739, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.7673284019128814, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.702426996306173, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.17855149299161596, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.5203115480779714, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.17855149299161596, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.5366596515222662, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.1832567180568652, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.46874652173038095, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.17855149299161596, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.5203115480779714, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.1832567180568652, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.46874652173038095, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.17855149299161596, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.5203115480779714, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.1832567180568652, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.46874652173038095, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.1969221590285716, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.5276344273763174, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.18728674627858763, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.49857067709692, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.1832567180568652, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.46874652173038095, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.14962848372546667, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.425496866339571, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.1832567180568652, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.46874652173038095, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.5019033159973346, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.3882810705699302, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.3200586334957503, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.2926675483598696, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.2940727992972817, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.26051063874884706, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.3584077083565857, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.7394348668357312, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.28531713096063266, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.2904734822892112, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.3469947595749004, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.2461344639192595, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.2655620124722497, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.17020807300741128, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.3763693611344683, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.6360504215730572, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.4909326710993637, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.32338428706911604, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.5840503541053488, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.2742556870386487, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.5231431994520171, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.24969367482838334, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.334851704167788, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.7234023926557539, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.21690365808279138, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.5840809989792347, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.4888708932434488, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.2579180303636169, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.1414885045412184, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.20760470031302655, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.42791815571433417, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.413948387915005, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.6536628131390233, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.5391491945473402, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.703591887429203, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.3166072542829537, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.14962848372546667, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.320407667005801, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.2564868977542172, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.5954598909380219, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.8010685131009633, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.3479731564184223, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.6172522642259175, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.2719943818446656, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.32282138800401855, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.5801749060979678, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.27486480972020183, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.4670509248796425, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.17202650214787163, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.3503510714510492, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.39058393006987374, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.16261055653267345, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.3574935801968696, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.31315988574922216, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.2220992502530224, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.4000805406381997, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.37568878636171427, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.42123893181020194, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.260080077047301, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.32481036250266265, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.346072731154532, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.2932383433617197, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.1317331393528801, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.34289957530696186, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.16405809898807555, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.21644311639014951, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5575527454538532, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.24248545140243574, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5768340234336301, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.3793309425596856, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.2037792411904348, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.49504238714090304, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.21644311639014951, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5484899089483192, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.2046592065585361, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.544128595708324, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.2165768464503216, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5556668977066362, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.20390514683548702, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.4993104339049491, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.2589731280621761, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.18413533063377066, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.4955509874287835, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.7164026439677106, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.39569555015790975, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.6841500930430788, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.45653838513939016, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.5463837424085701, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.413948387915005, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.5436043789950441, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.47323525740834854, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.45408876670111487, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3503570926151391, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.28592291256793106, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.5235047096821839, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.42221847853238736, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.6789177867237879, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.27413159457082675, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.28592291256793106, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.5210087973470136, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.8084123599808738, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.2076047003130265, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.5791447789263454, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.2821801681960571, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.5343486909870273, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.2823545141004295, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.4992160408903782, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.42781484820807203, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.3240069994352789, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.8084123599808738, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.3675667565747676, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.5351783489396891, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.30238584075547453, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.5923011903684523, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.32280531478977453, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.2719326877457978, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.5105359942982793, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.19408883848117267, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.38305978177479755, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.6457837185727413, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.2764205123105664, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.2905442260587855, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.3044316105248322, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.1693262946654562, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.3113002029497926, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.5780604477077254, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.29532213400892765, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.44922962827364366, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.14173543163061522, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.24993921017596432, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.5109316705796892, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.5109316705796892, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.5109316705796892, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.5109316705796892, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.48106412052016373, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.5109316705796892, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.5109316705796892, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.5351693240792145, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.5109316705796892, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.5109316705796892, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4853530227174386, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.5109316705796892, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.5370788574666518, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.45798136636926595, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.18814785746917081, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.5307880463310148, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.5199261214674054, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.2583320817896072, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.36710489645199834, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.5336198282523079, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.5049142010220853, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.5075366635951049, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.5156030482282894, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.423135312287944, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.5388253098847887, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1582866049832572, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.34487142413575794, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17905278399134197, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.37257295447029826, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15521606028436608, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.37645329404497957, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12620429887108936, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.35580703793872603, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12872220631084524, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.33602633953270183, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.03037224815656603, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3045613775157565, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5275070803493389, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.10203846572325131, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.33381153680096753, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1685643537060726, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36926449644166065, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.014935758919429663, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.08106107745254391, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.044304867337633724, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20806974344498103, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.08860973467526746, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3178004360288637, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1418524086391329, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.38295770773758747, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.15268019045355535, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41028757620299977, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.030860166165309233, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1100250143829584, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.21255327712152144, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.43272151570555034, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.05918530850500025, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.26064517697298795, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5092206110218525, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1438459189500836, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.30693371625402605, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0979038733644086, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.30211704738953993, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2288990188897003, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.48933901443699584, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.009624974244068071, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.07318255686027669, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.043420474648595074, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2884095690753619, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.19074380068002203, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.40566585096277824, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.17382347640129553, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.4061580777885601, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.13868172938464635, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3094469764260441, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.10361854845420869, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.32774802711076473, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.15186969315425305, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3458120002305796, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.20031726728306523, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.24015860380810322, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.432284860519166, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.20485833586704885, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.468735805943922, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.192481383169461, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3799051443349615, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.01252735726099625, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.273148644463442, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.22381487678101888, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5249370100068887, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.09431297723472011, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3616856339096348, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.11091252683001185, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.26607634610445896, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.189717083187238, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.10266747466754884, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3364703638684802, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.28912109037408523, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.2586476022577052, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5379610020033071, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.2144604484498437, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.48894052224175993, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.21001173689943997, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.423493931076046, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.006232910970143225, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.06317168666869727, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.06938388878349923, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3541078046399395, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.10666682719585797, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.33462901494141756, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.14557808399334188, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.36598346755702993, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.18154954789336694, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4557483776072868, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.06897533888461813, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.2776666563000344, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.1665765483402476, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4017968725013381, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.04151505758906764, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.19356733603515675, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.39927676303776216, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.12189363728567917, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.37595660827287636, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.18926971577178767, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4931453714148122, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.004663531624960091, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.07262533604330305, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.037401300306846526, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.27395881217705964, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.20198948917565754, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.34858221035657466, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.16780109158842918, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3968694014697679, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.1381751568911733, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3121557499162649, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.1579497466001673, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5092928545844059, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.16286876096900815, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3422914837190449, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.06888992790640074, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.2874483621307283, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.2552422097218187, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.42162943894149857, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.1631196072688366, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3502730667074754, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.21286836557101563, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.45055232014427626, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.031126201157905466, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.015970144454664378, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.06929847827527827, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.30185194035792856, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.22669486951066523, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4484451941575473, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.11697642623186386, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.37117753637984835, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.20065115069964384, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4084885616013531, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.17621963873521423, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.09916146090364127, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.3121110160693956, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.011560595536104562, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.25846370764999194, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4472520901382737, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.07368089078790738, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.41452613113710224, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.17892846390928677, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.47088195615067674, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.012370537823050053, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.06660321132654005, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.09831093939330879, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.33203866499974327, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.09916009482330297, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3032928217006101, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.13805615693046389, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.40787998733941394, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.12291219097556666, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3448002180666873, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.17643078314788999, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.40757584786696294, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.1989414239237112, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3791567776918788, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.09453698369211004, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.17848407049966333, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.37731466186079826, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.1995980198896431, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4244503391142409, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.18216362398065106, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.36524832602306334, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.08319287955437346, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.08383676689911676, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.2855329690010324, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.09478705591775652, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.33293232395887284, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.19148282873929853, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.4707949702068854, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.20608572305725564, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.4704943905570542, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.08183353655679478, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.25007633393249695, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0981642545874085, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.31793222329793575, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.11976209355757551, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.27004759126600675, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.18169212046427471, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.40815460354304234, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.09142555538569784, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.31371707771405133, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.05438497632520132, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.26123506271154656, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.1490232164900303, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.43745835724045856, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.005606294971348417, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.06662245090541388, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.12752236829255797, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.14057105892389254, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.3028381427383384, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.12157241570357182, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.4080990097991491, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.085416483900781, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.2825804066750608, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.10415298161056984, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.33452632923050557, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.21204239268527586, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.3846197304420823, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.004763623056487517, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.07485928007606017, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.25599133172724897, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.47354758587475243, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.11689600237805012, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.38258301195690664, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.19809535837880818, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.447539350421338, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.004718557257042585, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.08019304349523304, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.05614653993259943, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.26485323792360876, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.13635319583999642, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.2850432830231861, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.16431887969160053, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4088971379214799, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.11452508920842025, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.3212742401272785, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.08218359452575877, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.23905391762860753, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.17673835621668263, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.3902085179927465, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.051272222858601425, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.21925629669878902, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.26224701521976646, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.45901439168213753, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.20362195873137665, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4504603915919526, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.20927351091825444, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.41232284529686536, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.006488743008712295, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.056679733231823716, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.04209313835422283, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.26913406771501547, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.06070088845782673, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.2584364364927186, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.15184278721506198, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4093399937921707, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.15478222669012726, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3550584759508654, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.06244445123318812, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.28239834932587327, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.15685632649880807, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.34378295878971765, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.06467646497347093, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.2374647159547877, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.25564222289599997, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4291463251432027, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.05675489168243481, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3098329822024127, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.14459834065375157, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4652483976219767, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.056417721736162135, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.14707146406788849, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.050577564370191244, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.26455598459911367, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.1054433514098504, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.2840946641780818, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.16758563722627876, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4598125962895632, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.07875433150726119, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.2638954513805452, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.12334630141873701, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.3570869171580578, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.19153195331287226, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4035796398628449, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.005649824351905227, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.09384599631616997, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.2535787381720089, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.45789666197043016, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.1738582449442553, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.40161714405254456, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.16120676251405475, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.3934823211441987, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.006102253115653432, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.08614490649176082, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.05052791122570277, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.25244788085139286, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.12768613576122964, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3279857505284436, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.18041700926694673, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.43852448917973136, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.10734088848154077, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.33946796348247366, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.12499287263993265, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3031531068573407, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.14318317227039934, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.356756117753337, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.05915285533036862, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.24879847318007425, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4808459302114646, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.15720527174368754, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4715103005986015, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.1853793533058344, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.47839321418703307, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.21349841283886073, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.27204846616025496, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.16136987880724096, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.33626920748765377, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.15197436941722972, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.37271000364127155, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.17795920517030017, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.41862955401967455, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.17060644184287996, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.4054584763100862, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.17670199390439656, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.36682227371085463, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.05468777721214362, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.2495519218392036, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.2706589648942988, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.45763107813926884, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.2195372587354865, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.3664303672465512, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.17524367912943578, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.3908643084796051, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.018807992767181335, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.0887797545718027, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.06437840881729344, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.26576141148273813, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.13230039635238258, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.3269392904147474, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.20266988583156875, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.48179719155586864, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.19388048412249795, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.44361702376789247, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.051668546856871944, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.30087455074312014, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.07523788658275522, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.2771251147123664, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.11588911231177465, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.23827009172444413, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.21341277372071984, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.399326719551766, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.14947301772770566, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.30933819017694797, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.30287590293584354, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5604725285592409, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.004712897582765101, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.06896759630799948, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.05079616735013072, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.31593754046223704, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.17544176680792672, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3246583081139427, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.17337747588904887, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3602154895924569, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1237012344369667, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.33331866832253354, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.13636398180563136, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.28987182748753165, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.11209730709870733, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.31364540519664647, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.08369831431112969, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.2040584431589136, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3869788846445458, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1955903221252232, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.40982133788576824, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.15417968758527056, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.37529479808790556, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.04130329986722028, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.041010356073949844, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.2740873282030685, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.09199306870423013, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.22163854171424513, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.20923298022634812, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4391664941823773, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.15589802574348086, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.37894206802233305, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.15697021945336284, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3714913924449907, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.10748431441036872, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3292908634814674, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.005478181714811864, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.10269142826924012, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.22493822179966638, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4356073090431114, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.12201642745653093, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.36024578318571476, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.1107058214411635, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3732245061642063, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.039842290129399376, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.0904612219823137, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.1049798504546962, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.35536862718638546, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.06089987261870556, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.2933161562815446, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.13303798096767047, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3647236067340644, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1948502778967486, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.35525815981538433, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.12370396553485627, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.36583052207842287, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2119411486498165, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.43506560274344996, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.08430389952515091, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.24884644138656709, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4890540554955454, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2024418414576267, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.356433349373201, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.26748241941426637, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4950459974606264, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.010814656004254549, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.0896887156447935, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.2679540690980116, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.14391826157279944, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.3239832814361818, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.1769688060281599, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.43047038034793145, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.1618333627385132, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.3458746996740858, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.17740552204949464, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.41741195239753426, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.1442495030513253, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.3320792016461631, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.10022078146902932, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.25434273119344186, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.20519952157280522, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.39166755112117607, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.15991026977564765, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.3236465324693797, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.08233847062654806, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.1668854639288255, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.42010321376888254, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.05135254464623785, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.10818248451466282, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.3258308765385693, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.10027955093430833, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.3650503321876689, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.171833798351082, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4265037420578645, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.17393111207515277, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.39042812195808824, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.14437570687117765, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.3403124594589958, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.20204254060396054, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.40631134079482684, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.020318277383243454, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2507351446416792, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4471643682678732, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.12853443501376743, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.3781656409614192, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2009118875039034, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.48691094449139866, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.00517631148112868, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.06740002819965461, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.2452462470568165, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.15998665872195003, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.35681333217176553, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.22922072303609867, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5075702211165173, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.19064689695123957, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.36954921822756504, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.19591250914526429, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4012747453552514, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.14508364614975736, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.31907461937638537, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.07534587005281039, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.21764751288661083, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4696664542993714, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.24024632160930773, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4371716158103883, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.22508089265035264, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4850615114261917, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.11472913879621423, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0484266906744459, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.24913886287924247, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3141794892548087, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.11397867508647329, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4390501380282409, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.1785851272602057, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3800733399524004, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.10927209083864609, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3154924313217727, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.14399622191067446, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.40927634117587713, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.04825170735480719, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.2469647404663632, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.23918138501594022, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4342449335881247, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.13077065491742576, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3664151599144951, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0984296905675516, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3288790320536164, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.004333829482338306, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.08668716511436675, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.082669215590649, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.2792157437153376, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.20219794591777904, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.4267163836239083, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.18851320324917495, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.4118109845203767, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.20113943179758872, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5054929215592371, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.12371001489967776, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.35233081465372856, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.14334269972024394, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.31813833621829557, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.03336328330137746, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.2784134123049492, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.48776854891153376, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.14044205071392263, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.38712210510627154, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.27182145159884086, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.526070184366635, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.019129282930908934, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.07711027977872816, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0957971819913436, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.29031027439121476, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.16558784557611658, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4099467657500184, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.06638301361073934, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.33617826818768626, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.07088281524771703, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.1725752257112697, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.11414041086884202, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.35006273110713093, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.1313979824300913, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.349118229304567, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.22432434110392951, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.3260751755857241, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4848127748471755, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.14067214182271884, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3683127018431368, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.261537968195518, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4670896511609081, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.009837503470338078, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.07381634573546925, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.040393483260585776, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.266056205197059, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.09337623404557584, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.255517984683644, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.23272041020266335, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5347837552430531, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.11901413329120636, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2908877283991857, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.07103656838719773, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.27534420196130394, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.13329630208389306, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.326901495019388, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.08474505774044223, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.253832017325449, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.27776911552756844, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5207350087859894, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.17957474071770196, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.3714895660565911, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.17878846497211381, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4805877835621217, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.021888464288139578, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.06880194424178804, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.07403099975997424, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.25305896973942904, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.11889226114628741, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.27239589447707985, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.15964995175974525, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3581305879558541, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.15593857496482408, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3832822126692406, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.03964626561854378, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.16338514690556735, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.056046675552729554, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.18841684806509754, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0103535778609182, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.1374348609613479, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.24530071385520955, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.39829793941181424, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.14834005339736556, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3650996903674865, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.19730454275995257, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4060233750197503, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.06643196929197938, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.08968389355416555, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.26065548087048496, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1101341452220285, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.27185902677547247, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.14086598242600956, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.36137008859982034, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.21107720643690867, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.43911506176829573, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.03937709136327999, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.15616761711166294, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.12066885519467463, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.2616326726997023, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.05438497632520132, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.22483031146316076, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.19772510321758924, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.363359576781242, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.20352428331068037, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.4163814361305384, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.152083233596389, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.307389858154115, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.0019267822736030833, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.2729231212296316, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.15873502699291203, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.37163016195847015, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.16084008820568224, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.49453446122836875, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.13827947882974537, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.4010585707766239, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.25209019490063744, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.21271493861618962, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.48032757836964046, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.13242778128862073, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.26030283597527587, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.49112782340858424, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.22218345206036327, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.531967559614025, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.24660313247404905, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.4750802403221604, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.008808482479470064, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.08172375369450574, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.027855314822211794, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.2743064672346355, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.1046814649445003, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.35929100435813716, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.1605078796467662, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.3607284417441162, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.054452721416611755, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.17786113214625052, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.07394430141087438, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.308455636822965, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.1462174255670787, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.33477501662684966, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.06361340947543563, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.23606133878828675, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.28045674084265454, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4517321696983274, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.055923960513901805, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.19457014898752104, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.23365039523187425, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.42556791226379487, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.006313131313131313, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.3270101704079343, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.12832094336767122, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.3205905925059277, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.25513503948815797, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.5313546358608554, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.13788203160207568, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.3107655646435926, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.1709984622318412, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.15045845033645844, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.35967967969652714, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.05137179048809052, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.2179007767607974, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.3089004315341498, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.48772563434127697, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.17526461622814685, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.3887795637263755, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.20843200459956857, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.42187376156085354, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.060410667564482795, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.04978921592425999, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.2697504713353615, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.11215442765734894, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.31037546676327293, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.1634286453435278, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3969874268891194, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.19418939219609221, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3901489832573322, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.048304077864023846, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.23970159686687842, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.20653927241974365, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3737641013916679, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.08226969152601427, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.15696885690938372, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.23244643124640743, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.4157655954514044, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.1872830229239533, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3677267744209934, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.13963559400140405, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.358561623108295, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.07846319110079712, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.11613581491070282, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.2960371403891785, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.10180741374280794, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.35728563956947634, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.1945423193070673, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.4706222268986097, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.2080824447945289, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.4427653693519822, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.096873931674983, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.357921862131455, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.11119610005454576, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.30792475044662354, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.07130512646301328, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.20474632477427873, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.46426130647037495, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.10226015509299118, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.40597878858272624, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.17770686403077657, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.48140657517707824, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.06004443343680894, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.2456391015238017, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.04398085473438986, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.2975413849030591, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.06928783103636403, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.28222804846457444, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.1334077033965181, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.3538599860218621, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.12768405545127823, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.27604666315815635, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0780295493314355, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.24395345082956324, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.08696621894928246, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.1961381404046578, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.006322585449419441, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.06296222300910888, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.19041407809791294, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.40450917856802315, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.07176020589506472, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.298641857962305, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.12417300961365357, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.34447336960471725, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.01552617838217617, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.09140076236425068, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.09225450507547597, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.25320206287790814, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.15282474172999858, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.3332497402594901, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.15757286670204007, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.3806769190392542, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.05441383188454176, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.24018232621879906, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.1426986810099998, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.31786120425004616, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.20406556965912795, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.37608957302795537, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.05462016386637322, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.23421911777634782, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.40751249438768894, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.18085489460790177, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.3621070250317998, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.17617182067763734, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.3491313268402431, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.07124457091987033, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.19364253421963298, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.35262250988466515, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.10490012364788723, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.33660463773341737, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.11899511888513169, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.4352142502284442, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.12579975046393374, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.404341788822506, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.08821147859816221, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.3359502995810114, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.06336359180003494, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.24215944474690457, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.2516489827410343, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.2340940710746067, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.483043611496241, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.14706390860967783, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.3214752326421373, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.2054747730587689, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.45289260702352463, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.01050247985807066, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.12711751466537588, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.26095912281725886, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.9087119657256413, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.9560455759500431, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.9619541754104973, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.9780227879750215, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.6915198173552017, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.8970348615016391, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.7141488866263325, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.779657632911011, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.5940828417346083, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.8140638933937222, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.2864952055011831, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.47749051793940483, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.7550771065896408, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.9560455759500431, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.5696209108744519, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.7189523456157609, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.7200916222527397, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.8631206418308938, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.130164515743941, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.34629605922390666, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.16831858516266504, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4055923540305375, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.10548435635950038, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.3216875314121515, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.05430124534156916, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.19980447534063997, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.057343051243090276, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.2543177324262314, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.053828439630653514, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.17598984210619661, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.25623785429215434, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4677298196544807, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0725167639529624, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.29235386756607995, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.18224663601845273, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.42705463662730664, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.03011727853732026, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.01613385383938365, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.10556218836411393, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.2984252488377484, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.03878009660847357, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.18823804107102407, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.1048945915828233, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.2682365472098696, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.06705104400125081, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.10364481526694266, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.04876338988182848, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.1659640928468496, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.07896475900705206, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.2048108716476644, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.11105189309737737, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.1600171621099208, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.03852977679011041, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.10528935175350577, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.033289100271855616, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.01684375102771261, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.057124649943672015, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.12011237160584264, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.001959247648902821, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.03474890559758415, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.10986421427554392, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.14245697322261636, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.31264665723789214, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.11764506340202335, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.33221255414061107, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.03174814557417323, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.22677038600489324, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.1002259437998706, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.3097950385521604, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.1639463523594731, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.35760502908172076, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.07274026865105182, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.17148554966021845, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.3250663061067276, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.00987676385026907, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.06183822132049342, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.06896661690628482, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.25550275941289924, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.18966584330202407, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.3677579424388819, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.008676004985440106, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.06213735883437277, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.054063951849895646, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.22471751395789294, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.11813127408984, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.33360638121733993, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.12452426344763672, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.33397340303561174, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.19483286033292496, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.3818604583347797, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.15375876337917632, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.40619134928299927, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.18104835024815905, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.3646278830106281, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.04114837516633565, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.24054900896535664, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.5113816914630019, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.1438005681117966, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.40809859931270137, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.1960515347006078, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.3829188025031592, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.03388606284285434, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.03903463655378414, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.10483721246894277, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.29429054338289595, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.18262512815371146, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.3793372141796691, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.20118989409590474, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.4568193159296443, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.1463728853883045, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.31232598846995213, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.19571475348790923, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.39920398184186273, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.14494034972679515, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.3591070798808878, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.06655131935655087, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.2586761509449017, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.24155574630199578, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.401555835867115, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.17968751167959493, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.3532466621095985, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.21212476816911802, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.45277145500626653, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.07535055465501972, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.04639980294552743, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.21165282585763617, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.18601317791265554, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.3622769931215066, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.10134296554489586, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.36352625085241486, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.056826643919713225, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2266491488847452, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.15537992498552838, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2883334105485886, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.13868172938464635, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.28335103320234567, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.08456930872418965, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.19628672577369188, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.2015417872146417, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.43922925250217454, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.15957517702086002, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.40743897447570776, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.17592438916838554, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.44324094214107496, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.04460344454473387, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.05185187637560759, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.007193841255268015, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.07281943700425128, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.11586860285558973, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.34426859851631064, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.11537817464773759, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.3235971362772825, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.06766231174629671, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.2969445272858798, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.06136044368743127, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.28064577977026595, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.1270166435836513, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.3358025326946177, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.038084270974099985, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.2506085391461044, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2763478747844821, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.46667544697904584, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.22886082820773523, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.44299807792068047, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.18540761881407403, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.44065653401882693, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.008002390264131916, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.07894790558653815, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.050599660118273604, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.23384318295085793, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.15459585401418227, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.3893244765025937, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.14329939975046438, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4108923582650918, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.074972966125329, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.24626763305506796, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.11125675698572768, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.3396260733677202, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.10292237126920266, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.257640060451522, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0059334149311646755, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.08228836041742828, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.23674819725709428, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4112073942955352, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.11337534719923378, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.2994620506111809, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.16752333089459695, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4368242584300212, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.022171554761884083, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.07208407750039555, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.07917353810429117, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.28227859195431576, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.14039526843208108, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.32365712589934936, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.17760506260243636, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.3876268199476132, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.11133338686962291, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.35573855512815966, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.09619976321707868, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.2711236080024818, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.19766937520219646, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.420372038303943, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.05903450822297854, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.2340338200887042, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.19395070203502235, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.3842790377686493, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.17279545961951226, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.47435984974317186, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.2355775714536014, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4751548693556032, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.07535773624684425, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.2964854867084402, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.03901477466571775, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2037012862266554, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.06637842065802063, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.21920740076602796, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.1449164009012341, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.3465082189188072, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.10248481481009991, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.28479391150159855, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.07015085442074137, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.16316416553915214, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.17794585206155172, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.10515459794240459, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2595574477657192, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.19278662059992974, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.041456611364145475, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.20732523199036149, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0465445269655205, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.0737413158125885, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0417877035451695, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.25972967536309605, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.013978194191220837, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.14269295464324133, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.385707323503979, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.1637184523716508, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.4161934605653721, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.11145596509983458, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.2726799212917933, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.14188725539444494, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.3320570749305626, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.016098806505679464, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.0896538250999998, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.22297147454167018, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.430433050701162, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.07943825618517553, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.3357496873199587, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.19203703944580386, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.4598954047199655, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.04815454684943356, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.052921142368188685, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.04802396750487762, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.2260828104618917, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.10348510007232478, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.262284085504438, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.2592475459062113, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.4431845520770858, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.10490171797384476, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.4224966678718647, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.15853866673457936, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3919119546822024, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.10228767876406449, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.2563726374134224, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.06599446653043191, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.21965782403107392, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.27813241413261985, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.47379560516065056, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.1639110243114347, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3643799443374109, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.014207387844820368, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.15411273725684005, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.2147416276784343, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3929468618613229, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.05685645333743278, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.03987881150734634, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.23923990018919683, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.05551337802991313, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.17127810877064262, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.4177685028472229, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.21087258811486068, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.4182975936964002, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.12272317458390947, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.35138790489746474, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.18399721582637169, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.40091595611954, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.22790946436828668, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.261400078784685, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.4246430677980096, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.21299315183772102, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.4404655669054682, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.16495971085520575, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.41762478633248973, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.06059807419181428, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.09276333478547918, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.31791450398384064, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.06570335870921905, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.2479447435027854, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.11560620039242474, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.2786890063705584, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.09867858411809763, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.3225529873990059, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.09848268370198501, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.30734497874640454, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.09835335087704755, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.34005102180757063, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.04721429386335365, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.31075905542125126, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.5202913427171048, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0794619983812611, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.2908372726398948, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.027719195525236065, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.13685668579971297, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.20470875192232219, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.42504796662718713, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.009098982345428752, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.08310894234642512, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.04045381290021945, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.1727288037490547, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.12370218124034608, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.3512013342182375, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.22396412213589117, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4759025637784212, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.15627978895992434, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.32774682416221296, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.2329029580518658, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.16942168323911655, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.348866040524313, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.10720803875144091, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.2299648095054087, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.2687134687012366, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.5054807217503037, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.18957610315835438, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.44689148142047136, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.2179258604235174, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.49494036869518365, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.010596646407804984, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.07677294813171638, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.04840317993719298, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.2527418671450915, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.22220265603625808, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3850042924274655, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.23925860034637106, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.4322103094292488, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.2340940710746067, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.41823616908031946, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.19992538617203864, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3997987536424768, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.10781100627978431, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.30457905210205677, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.10326096421189704, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.279108907377594, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.28062519500317484, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.48566527968707496, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.19970444443154955, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.4423950546113178, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.27409004101993995, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5284886834986656, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.08236287459432604, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.06513324878792714, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.2888842986482907, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.12790043463054807, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.3251137272879362, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.20381173318192514, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4374242147608937, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.1863636050757979, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.38327347200836553, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.2266753254734694, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.3852115955641439, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.1499204981771791, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.3722568874154844, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.14104469432134092, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.24275134970746923, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.22419461419741465, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4592127766399572, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.19211812593555902, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.49977246377030776, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.18531976869758127, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.37963243794803875, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.01233876574258688, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.07802251833851097, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.06251587118034302, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.2652243792907702, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.13774451171759725, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.3392358805928605, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.10995304272182382, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.31516290141130093, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.153929566721589, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.3608257907466213, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.18480496206296063, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.36919372543608214, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.1730275917150875, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.3674830543699603, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.013515578278040412, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.117607119705924, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.20326791526595261, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.3619867553629671, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.020812790241140106, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.08148147559856574, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.14424669882788801, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.19255699527273906, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.38859820242385235, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.07808637566840695, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.07616370704276611, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.24809947007268568, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.09576088800276077, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.3041295470213464, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.09587088737227797, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.2616214569856814, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.2623913464686138, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0964828084451256, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.3032949665048769, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.14518717773336579, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.1838327121162663, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.410915242672095, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.08971305206542297, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.3602440247006994, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.11918841281221274, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.29606868731001496, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.00580156080768459, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.0656391052988084, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.20131698736551876, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.09302909573829315, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.1930745826064226, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.22277635660975664, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.4138180006822518, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.12683453413302323, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.32324923216936663, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.10923883271644276, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.24764809403476246, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.13767897792280337, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2518191271414786, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.07620016398847337, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.13944355344056922, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.18493395776181829, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.40795153012607455, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.13138966194745244, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.3198162160638907, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.06438908961410146, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.4167663768476074, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.013538368452335667, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.04880564940883189, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.10949933984953873, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2483480632420486, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.10406813778823464, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.32304117956922, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.11433133814822793, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.376685707847211, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.16566832287055458, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3899280134984665, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.12219090986337702, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3731186900607078, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.10132612615353878, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3332972208005526, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.05410002607065616, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.22982437342911913, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.5063070913117304, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.215820087371041, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.43525547120246666, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.1722310583472658, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.4259958309795401, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.005564453544831041, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.06864478605677955, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.06011462639386161, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.2847809875523831, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.16279177723066465, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.4102368807414641, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.1418972100477343, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.35549463565231143, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.1340739087751559, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.29651992518206227, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.03946884506458087, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.21197738023216034, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.16868089464852234, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.34843578398752406, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.04783137935732651, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.2178243901049148, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.21864626600355053, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.43449793297089706, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.17247694128916682, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.4371568532396031, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.09310860627754827, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.29711898112528934, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.23063365203643552, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.4976085209077489, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.008193994475617358, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.06416871627119347, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.04977881551947034, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.21589989285424255, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.11739016262053983, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.3243598686623998, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.12391818909971714, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.35690404186806807, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.1293238011288526, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.4229943657542825, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.11994392827207193, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.31204692452038146, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.22226034707641953, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.4087847602622323, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.07794901287174041, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.24352940914340515, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.24693288345135292, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.47256029886426165, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.12875681763458682, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.3993394251736514, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.19652148611100978, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.4282480517889739, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0351824757158749, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.04596692032850378, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.13010283341659237, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.3170762221054434, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.03625427565378279, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.07306837018670909, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.39671144662530883, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.160940602711541, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.42923242729167044, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.10754901074081791, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.4366819145999934, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.08724562123777481, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.41003179675795876, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.2316040185438685, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.14846392828893068, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.36181149063759965, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.1085914542989672, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.4077165098927507, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.4355810088704621, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.04925147838126391, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.2975513707144851, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.04819270896239086, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.262682816076646, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.11764942628273296, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3661118918972216, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.1546502365264458, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.37662498358584207, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.07247369212693866, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.14796497932000655, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3628325212637828, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.04881966677808663, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.25763198229409756, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.23012496435016175, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.47948987011733224, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.14926399654342473, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.428199968972176, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.15248248205315285, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.44007880925740467, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.07284789180910362, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.2875325046198955, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.16053664506786586, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.05360431433749398, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.21840614866685698, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.146687038685289, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.0366752648089007, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.032273297160432, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.17740798450045847, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.03334273639438118, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.15056586105615624, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.26561679924158815, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.01793435395643411, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.03789062221672838, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.22901989299104952, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.03624533112227059, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.13708025640641405, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.10180668728147267, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.3387919511268085, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.15878190990009447, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.4717304128091511, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.165417615101112, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.3723049467309275, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.13331291985681393, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.3257699661757467, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.13692710494817806, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.3787409988662885, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.013310536248432623, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.09174273696529235, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.2985451377718839, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.5482387728422947, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.08575430966320656, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.3288420528759681, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.1531944282330758, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.4851467956233017, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.01626757442125486, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.08310080113327457, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.03791800710695544, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.21909149207325135, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.14493605424503186, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.3501358359880476, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.1270407903803617, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.3551178453495837, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.08711637171502758, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.2954967025562741, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.16553550114612764, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.3714847127120771, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.1391202023366056, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.3301375941706545, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.07640690767869955, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.3081247577756139, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.27425373620095284, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5028086616418246, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.19319119695627468, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.42057504262138584, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.15767660520106525, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.4246819037324454, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.007158196458243923, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.09845213748343752, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.3055875344104902, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.1502843121431226, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.390370937242866, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.2089790450123036, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.5020912846079854, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.1224795031512657, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.29049145213151384, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.13000733187642288, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.34493406005242266, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.11869174098838171, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.28873225679782916, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.06713856844474306, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.25037871529772476, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.27359525909456617, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.504913886563221, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.07024260582912173, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.3102655290838244, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.16352688052951744, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.47198435779029524, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.008423858606448082, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.07656193246033283, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.05416888177698682, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.33860046879314426, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.08810199114550488, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.25421876554719885, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.1723666822203382, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.4164179694728134, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.08174592824469444, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.28125186470074287, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0821491619656783, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.3276253201601609, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.13455263708735177, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.33030019783272807, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.005072422648731631, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.10225328551984775, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.24332752430414528, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.515325177740931, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.12045545488000142, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.4270379715218122, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.15558277244135618, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.47290283753021434, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.008715599257989518, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.07480070273160805, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.21138349896611064, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.10716023124329743, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.33305109264677923, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.16831858516266504, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.44925788158953217, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.06005500834321576, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.23500168157276313, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.264506771329212, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.155483560195194, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.3577679330118475, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.05134602618173752, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.23119628922687724, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.21846037005111443, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.4323411521204525, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.07969115995241391, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.2886462965890724, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.07486383088392089, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.22899815715803162, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.17322793980422166, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.3827655782429965, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.013967621194811793, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.090985176350426, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.13648611015535222, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.261359122933737, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.09532344847561978, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.27911273015721655, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.1620442038279161, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.44671116906860114, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0808656459604844, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.31629942918298065, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.2156293226810056, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.101592292592622, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.27609002958205586, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.046672540599309545, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.24102097474536338, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.468594330157441, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.1297242489488828, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.3868393915563033, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.17933350957537242, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.43699109907071665, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.011127070300895376, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.08758323198230493, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.28297301636963595, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.10362141065231315, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.2976713869608838, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0535442755496515, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.32534949147415587, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.10502639606076236, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.3443858948320673, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.10787346966969634, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.27908818408460184, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.11033135811581492, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.26224674138317167, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.04113753433682409, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.1528625963145929, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.1552004627188075, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.3784281204778653, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.09593662885334717, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.3241465595416165, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.10116506361132198, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.33164984003681125, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.007131140141430805, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.07998365456424056, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.056716976776784944, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.20621198166782573, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.20843904443769717, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.41987041749209575, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.16892311117272107, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.42944637897474097, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.09407649401384535, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.24845011725491462, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.1562430189793864, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.3707779359151589, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.20050198179023138, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.37497064056062096, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.11693769283709352, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.24807742053452883, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.34641276682746075, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.4843161200028808, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.2691093449283904, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.4584117553286684, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.12808121127467118, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.2940358902223427, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.19341593401394422, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.45750754600729443, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0070813413245724545, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.03917953408872804, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.1085943935733644, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.3150869338737483, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.08853913752009362, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.3699120467510799, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.10565962599924915, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.3576984412786932, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.07750873793282746, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.42519667805364314, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.1993887445781869, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.11740286850698373, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.31504011454506725, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.029191792762547256, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.16799318698762442, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.36562698117819364, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.12155966905167627, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.3864314923842429, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.08288641431941539, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.36030774861919207, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.007498600494904601, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.0920325108581037, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.004179571578298041, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.07351837954322755, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.16306869473764823, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.09530685695259108, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.324860923353352, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.06261022269185519, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.16543452283536295, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.042425836212707804, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.1532311899154315, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.07314384162266307, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.17064167037500447, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.05263353785583368, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.10555576819736473, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.08870141712804926, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.2956750528912434, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.04316074561911492, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.12179766740107663, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.04960414486693654, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.15447083970732592, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.003598209291387237, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.053420519861741254, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.17991947799734828, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.10417702105111015, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.2840081496768262, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.22195513186933313, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.41825817264037385, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.10512531898680018, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.3280524648390448, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.13250044494118035, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.27178201004990943, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.1683751554206474, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.39066705722063644, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.010968538854373978, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.10609012911479784, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.1814330257149915, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.34300085767735583, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.12977918792751478, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.351708163232579, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.13462044240543036, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.38599010799499406, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.09645717513252113, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.05840767670858235, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.29437564634854996, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.06549365852999947, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.19948256778755252, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.33171820256493456, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.5317418755857823, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.13801294746633136, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3369282304053488, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.09320127430844248, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.2411795889403124, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.1383681036030409, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.349504676054034, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.04693215248462417, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.13798005261606958, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.2358588586070764, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.49341195538644056, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.12439394521251071, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3794511361786166, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.05226233169864726, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.2706356884347931, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.17905278399134197, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.435154701956904, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.01158212560913509, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.10215743793929438, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.06927836159710253, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.28632181397023265, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.05045310628409221, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.26506847806653416, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.1177915530693848, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.44418786085508993, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.146547094309055, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.36840754531694414, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.04532894170735517, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.26844337122586925, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.09624394213587038, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.2754326129166975, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.04595467899356206, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.2060042918766775, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.22263404925684163, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.43676191178353707, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.09537286664645776, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.27089796392151044, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.4004950959515481, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.013176168036187562, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.05448980490008836, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.010248885552626243, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.11174990539960772, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.055310713185595, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.25742908684385607, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.20576246677399845, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.3789603596807018, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.07528967210822063, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.2659777826374339, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.04876338988182848, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.25024446120257093, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.2811165646467243, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.08119930104030995, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.22749421384497187, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.2308676152532237, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.45501496824447146, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0559808324315414, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.22623967162682507, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.2487940867712376, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.12980630299792445, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.30447471429985073, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.005458377934582915, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.051109671088680884, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.04977881551947034, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.20431472158182293, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.17509809383946048, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.386834770913833, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.16322400014183205, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.47216572692709596, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.13050295514132168, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.37331190108047335, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.08956508021078231, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.2831884743240396, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.10389284801515357, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.37748572050737733, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.010386843829287925, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.06479288155356677, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.23300319315350754, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4491554038721914, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.14673461496683476, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.399781941109151, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.08038264410981451, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.25459802441879237, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.27240647173555915, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.5364888216436483, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.013738103741112312, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.08271100002634647, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.04550552601380896, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.2881361840023623, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.05116952807627418, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.10314036721569257, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.24963395554422432, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.043321633865040066, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.1541835268049213, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.12155067150546772, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.30434034369814117, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.10171104705932635, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.2838287277443614, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.04322037497124764, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.13973249426000348, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.1909849699402812, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.35574761271391786, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.12335871645688117, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.35359462961003574, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.05039339928845027, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.268359380285868, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0032325239534376927, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.05217514663240744, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.050132348463440425, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.19059644113563182, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13410301071131794, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3942932268034351, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.598931508663349, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7353063745802827, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.30677064886592076, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5308555945242818, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1327526847508867, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.37850602486495205, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18405035438430847, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4142901090120915, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.24239458593560292, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.32069132319909655, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3857436691295343, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5750224388123065, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.22478613858269392, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.44348101018104913, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4026159305424288, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5712560131047175, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.183687049781416, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.351911486970854, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5181825846579515, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17328174803055044, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3178268797869574, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4262221594184117, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5886657414856064, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.286608441075188, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4579283646292802, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.25861130592298187, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.39452644092432093, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20379250618355427, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41085414309816914, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2990226215771518, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4323734152924571, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3621517589760531, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5866873582151947, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10434360980785336, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3012789660952507, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17248715680799764, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.40043565243219187, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.13835317113453516, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.16343842313572918, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3986641525285075, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3315037521841549, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.468197879470805, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.43186481103649477, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5792139686527714, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5553909583113487, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5553909583113487, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.25798723088167685, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5244854229988815, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.460474309246715, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6213537794704693, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5553909583113487, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5676965183365866, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.003172770121174655, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.43186481103649477, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5834549494301647, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.22218130727359342, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.39929356245904674, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.46092611919700416, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6365915338629015, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.1690979933029136, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3751861276375209, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3344305108778801, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.37017501464955627, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.3194331635465395, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5031092445628172, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.2974074484950165, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.516333943378855, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.334422418242443, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.47577086062579566, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.4482907809719588, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5498272118133005, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.02467424260792568, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.21902340561392236, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.43906671679239717, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.26538706048179084, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4982627378595717, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.5234484809182233, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6658297773613274, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2840563956846642, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5110250591004448, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.30007504691018483, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5153810823423555, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.14074957769288798, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3750035199199742, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.232738415750697, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.49618971681248764, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.3716332023564544, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5698425037615289, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.3488611533620711, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5550499651473632, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.31883477089875656, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5510450101159524, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.038236956722392024, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.31998097041178836, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.15161074985415177, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3796830006266126, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.3815250264738168, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6516314751979607, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.19920413481788912, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.42537796926163113, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.189902924205034, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4072184389907138, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.18710260593933364, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.44334313717706003, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.07757069009917116, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3253161209971999, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.20292918891121983, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.43895593415558826, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.22168992033645996, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.523689661176845, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.2722704374402053, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.596004654894533, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.01008902035184167, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.10279947040838337, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3569840483632983, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.18679710353734788, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.3876457319870774, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.40003810431098236, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5899097408105687, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.20401796878756984, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.43317630453631556, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.2097387761551816, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.49663301508497226, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.27067168022307464, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5439625482235064, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.2826204057042236, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5043062352893725, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3901529878372595, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5914927912573461, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.29588994069727786, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5527117669081858, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.46732353406180216, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.6059276585345114, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.06266083709457643, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.36565527196849945, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4882803186347697, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.21812881407613688, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3598346059855135, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.38047531731529327, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.49485723102957346, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.2044887070217883, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.38471585132587544, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.2309552734743087, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4672309378181727, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.29886658673327365, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.1367498402979849, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3943841419148219, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.22848523472845572, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.47155340591403294, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.5676006714726635, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6880701448812352, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.2974074484950165, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5121581247515657, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.010162846529607748, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.21947959999379651, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3226457008913864, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.24586918158076287, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.4658595745396681, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.4489235959690452, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5934678825154104, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2980504190448601, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5101268920225042, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.28800869328515505, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.49348678623542436, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2429163097293302, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5044329486461447, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.23073085454808062, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.44142087654422146, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.30167234272902427, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.543245170223285, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.4406612884550454, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5676112112992767, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.4476950425126913, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5932980209045412, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.1483315516064897, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.25249051585915977, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.38558450790399557, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.46832763312452297, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8176110134774669, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.9436043261706615, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.9880191679951993, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.0067104198717751464, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.9025232868361638, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.9169897590736298, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.9709835434146469, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.9951728990866464, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.9154051169199643, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.9757471794927451, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.8935248372106969, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.9404428602061264, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.21039673882735752, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.3872019296036794, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.3642482472579296, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5342538783335161, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.18831933500600306, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4318025704181776, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.4439623527529193, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5309137918519957, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.2628849077177109, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.17879309995151985, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.2568045428196672, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.30815875749045163, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4611100423417512, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.6099084961389527, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.658015760514539, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.2464380578618272, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4278399263644655, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.1879604201975219, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.11956615218925931, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.3305337714496588, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3395693620772222, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.28406136898728457, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5649283064490618, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21544027588567594, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5040038440508637, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.28552127890094825, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.16448947606185552, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.11823053204772466, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.24615921057796505, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.3931991982536581, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5762809938404015, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.3491726680217181, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4946434087697324, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.22916123454514536, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4192305796685782, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.03586767012087445, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.10393938326032184, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.33864898055191395, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.22556860731509948, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4747086049005634, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.23843418577408987, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4082320855803597, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.26970223719007375, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5172978597562362, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.2372622545962587, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.49004864454711367, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.190140357671548, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.23647235972003527, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.41650969469918997, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.25863626048999067, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.501623272099173, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.42662911848025076, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5800596652250789, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.39420326688847324, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6371076304605184, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.030501743754356173, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.19352792845274666, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.3925864519770825, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.25449674462950855, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.44805409822643144, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.49458876622696707, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6179893617801274, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.30630098078522544, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5439056051092116, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.3059872016765634, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5499457869553984, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.20076347441707354, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.49625515445592083, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.18665948437666813, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4437597552815582, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.3424885275916587, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.568309937115554, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.48670274592792, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6717971788322309, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.08002354055277362, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.417372155782838, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5043502592801646, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.06680433144407034, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.2919280798407827, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4076170046499833, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.3312570339636223, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.45442661484375735, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.41520313827696, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6485212540886613, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.19850842371858787, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.43584341835040474, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.284161309400485, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5096201523229312, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.1134451991138546, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.4186167762559285, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.24088562704853508, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.3796021685415706, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.34401346933337906, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5495898550305974, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.43874832905672956, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5882858748700781, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.26652403565303173, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.3270207865532903, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.006769280526888359, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.35465713644381464, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.4911561718424494, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.22744906705116497, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.37233302529431345, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5112697990822607, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.20170335119323748, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.3541251997977811, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.20170335119323748, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.3885781003640365, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.19139378056089276, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4390566098420477, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.20533223830207933, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.41944354523254873, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6003681413895097, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.28392242834976933, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5447934365522582, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.21644298243944068, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.39638009285990555, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.0013316837154984055, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.16192940337079562, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.2536107727939302, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.16542259679471108, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.26975832150444645, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.3857901147929391, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4926358895461277, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.10508106635796587, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3182774828667731, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1214053825777097, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.30188342402741686, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.19420534060688366, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3642322841308566, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.307338019036143, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.260711748598298, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4104518639082048, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.34848192645275233, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5640398262082806, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.2834052290575623, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3513608621054937, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.008037190855333259, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.09960206740894453, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.23680099011195122, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.2831988281847858, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.16885023000999705, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3897135623573608, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.2309552734743087, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.43975656978777905, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.21883928293594496, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4140411856219547, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.14270596284245182, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.2812419410574613, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.08966592262979808, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.15649677978231225, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.31280763439438314, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.518277214190325, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.39448834349896583, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.5955979870734485, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.38006094126945456, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4952333573221428, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.08451648022462464, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.1919704825961155, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3465066144910527, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.21877512875558908, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.46843499948065653, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3325026294099889, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.45805631044287126, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.23530033724858213, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.46208607300298377, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.2936164862319274, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.20512476044697742, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.41887716212519804, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3001327916132617, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3729157997624686, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2329218075757412, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.46557005506050453, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.364475284252601, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5303078856093503, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.40570535345933584, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5128999889612808, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.03788280195139692, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.18426708758406696, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.28632666167603604, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.1759696284842668, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.40989005404786566, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.4824794737945071, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6479459161283603, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.37284027455688556, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5528347504734102, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.2699951684630893, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.48283570465443887, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.37458689678657137, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5282052116558258, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.18272411487051296, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.31610070189880046, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.38358255514637035, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5669740223554237, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.5368572660911874, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6191297828676535, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.17472341341804662, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.524685968336986, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6168947391572741, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.007563050858412507, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3245592826703331, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4961612243992949, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.14722675403683808, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.28846790344058515, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.4656455050518963, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5477103600632085, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2887308472548599, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.38846174119508314, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.13497849469044018, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.294518464464907, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.09207365845406566, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.2745662608288741, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.21555651352032254, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3334657398473605, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.48862358384725524, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3325507240998139, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4074972493988868, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.35372399264817345, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4415325921468054, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.17383350569405645, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.18154339087127422, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.32445288009194484, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.2542828011834812, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4695941026465371, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.485644095022506, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5923993450097689, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.15487293534817623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.39293494862736383, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.2746536544630471, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5102501783603998, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.18154339087127422, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3913469689541562, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.33281148468111865, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4458341777155295, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.286122396488242, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.48996353650153573, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.3876366843353173, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.49788350946214466, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.43024400109694355, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5419412911283815, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.00801785790731701, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.09885362316286796, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.2922887728653336, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.19659284558894802, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3439604955527307, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3150760288937462, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4551575101683354, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.21741853044139284, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3535910166292039, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.22171131219306292, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3447082086936202, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.2006070365475092, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.35578462620981843, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.21812881407613688, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.30905257672100556, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.2614728375659921, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4319733175313442, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.07757069009917116, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.349379547582856, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.30548599245666574, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4679798467191344, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.014790009967292654, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.1352102459252932, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.32096940912916827, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.5046613014990851, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6377969619576389, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.6485902560215636, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.7627201392474565, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33626819961829335, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5466581859383387, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.4947540498519851, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6678485036966576, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.13753714471937797, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.47439937147594774, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.5037566723027213, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6887059730011686, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.5401659537103226, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6799874350995705, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.5639943531321595, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6640560919035684, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.6878626650756018, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.8027840472006857, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.009344129172007202, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.25070100037410625, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.41914475317559957, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.20326213373677707, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4211311971791892, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.38713346831820944, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5376493568188783, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.32000331642122953, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5480591855923784, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.20926433421787555, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4046472012888725, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.15758218479424427, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.3026681380066168, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4321096865782781, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.30296887338419454, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5234904895800095, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4096636841225722, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5278686169310903, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.08419519560443713, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.24974396789011238, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.29040471938315554, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4617994565642428, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.16484140461834484, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.21181203648983093, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4245081340557523, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.12890680068769322, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.2478022357548686, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.49124012500448727, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.21132630077912357, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4175670766052166, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.22554684328666952, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4891635965943922, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.1370370495710889, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.08013992490936363, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.1552012946702491, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.293968634178725, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5129012262464672, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.30027814350488985, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4842037100625574, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.20881729496822948, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.3442652041844856, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.14470626861490504, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2566358039205211, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.14440270272056518, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3829771215415724, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.5543498698280007, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.7016802877815009, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.13108369255325433, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3929302741911199, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.36484904083194636, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.568282580396246, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.17997291109158148, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.41721274496653427, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.16912873274521933, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4587620543754354, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.4036650481496061, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5988798086699901, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.17864866390812006, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4085144222648808, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.4333488014632613, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5965589158023283, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.009743563745649522, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.31745891481127153, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1667112120846934, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.30913125513655043, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.13203823352287472, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.28280767234695003, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.20174045447955946, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.33729298835089516, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1667112120846934, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.3259291852615986, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.15756751766261828, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.32067005734881104, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1716931385864913, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.3193195680733763, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.1948024190276591, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.19454290935168922, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.40563779058474464, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.20365290006260964, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.4327934810550205, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.12475528808004005, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.23623824056754247, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.20455275179869584, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.47815735761186096, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.37420316460821246, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.6481907872475802, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.10878661088699644, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.4101850467281004, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.14636985946104297, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.42477479000228696, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.13490983794909628, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.3721166387799897, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.24199061099369143, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.43083710197985886, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.3821120008009713, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5899756751768851, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.33904091445927403, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5149735242342894, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.06556262572922589, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.3074762344614454, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5504296088375491, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.009755969152501744, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.26351629170506197, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.2280725846401638, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.32365707034585395, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4584149294578286, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.13894362470892055, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.35710461878741834, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.13763666698607552, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.35163868522447556, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.24113982759255023, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.22171463088948726, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.2988062022268337, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4421623642899925, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.34420876516527255, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.5068918436131059, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.31461500509304885, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4233526729436835, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.004693571154544651, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.23316243238650552, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.18945235333331134, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.3443950901432381, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.26637160423927314, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.4646735366228476, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.19585063466021865, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.37259242197674974, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.20191019088262566, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.3773476047938683, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.15487634919018395, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.40723956344919005, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.10067881517323463, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.3394719609522645, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.3311961105131967, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.5192887771709966, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.24263763794008045, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.45834072271970505, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.32018866449570155, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.4142777372959173, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.008115464234465965, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.17226307014894685, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.2929617403109482, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.1581734375963556, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.29855796123914957, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.41749084544527715, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5512835123193889, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.2661644792674617, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.46546633658762687, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.21203547746686083, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.45610528927175875, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.08644237346462266, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.32003978062314414, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.1602999168643203, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.3213856523369838, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.5227070077068928, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.31148557892944495, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3867464805058363, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.3688189810109343, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.4691915858995521, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.0465791689548439, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.11175354227446688, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3298241713743613, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.16832254701348195, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.34172735320777375, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.39245477087067665, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.5302932215753233, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.17171159782066198, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.4250539223489113, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.1718526210271666, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.355144746174474, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.18597468573870948, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.3585545206106179, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.2543269102041851, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.45657979346800803, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.27593704297838784, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.4517230506106012, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.3203505804334101, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.5201747871509939, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.18367196864850696, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.36528750965938445, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.1642384866095333, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.4097157364333445, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.4592265081063172, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.1567864814929981, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.29896910170538116, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.41423657620328247, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.1524830877429947, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.20357681430297922, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.14691264038600516, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.09802862511942351, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.18018110949421698, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.23996024992906517, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.3763392895892706, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.1414355019095281, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.3460029262848581, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.08747015602038587, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.05963570658750385, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.2115339554327366, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.4102495208939548, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.6896260480312464, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.47475604110292025, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.6309823582859546, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.2685172542195998, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.5385038880406502, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.25284666735526534, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.48906659909069483, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.21208798872596596, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.44139630621320486, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.13801642938000075, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.5926964880100889, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.7449340020430956, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.3537581250657245, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.5377035718944764, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.00615279366159919, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.006004997316698418, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.33636994463223036, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.23511486401816076, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.4879376273715227, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.46676620029268096, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.5960578086281888, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.21011665246752942, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.5022122132107005, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.2372622545962587, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.4647111452373025, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.19849167569028262, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.12206082504968152, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.2550055792545815, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.3821120008009713, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.5766197400237346, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.32263107476456176, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.49456548094666447, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.368476710110977, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.45696515917745634, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.002730707494799792, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.2931120472907597, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.4496606373231774, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.23892317781280908, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.38939729384213495, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.3481041677117235, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.5683248500873576, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.2931120472907597, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.5551374467086605, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.316948318594896, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.4848206539921205, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.25029077088688034, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.4952931350872444, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.37249688769592515, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.5898027592815512, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.24750028117795922, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.506555887771498, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.2816170941913842, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.4822027054198335, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.15419283939507827, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.3697607442845809, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.21246986343160065, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.41946495258955513, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0935071820875984, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.3104146584296793, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.46742290550462806, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.6201011845566285, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.312650266868888, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.3779052914818611, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.08852818280995225, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.3389067344533533, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.13841356129663587, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.3749458691052568, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.6457225344420294, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.24066493791066654, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4042037876162315, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.198980950307606, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4829816647739418, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.006459948320413436, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.16144218436085053, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.26044492723898, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.127245399039237, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.08385867792924734, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.1131435241082435, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.09051634336084007, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.0821783833049982, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.09398807997403576, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.21457149286802543, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.2929404812443863, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.11152889019962353, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.09318017229533021, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.1130888847477738, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.08874288018896902, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.3955646912745367, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.5363251034746059, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.26460159523593296, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.4803700055675181, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.12561164303321054, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.47038542160135094, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.2579623389364088, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.5445453384696608, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.21367689804137183, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.41730988147513415, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.1258504577643309, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.4394273483551192, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.29217369652859104, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.5438967020554117, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.5060596870227302, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.6440208597098943, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.1768429537093963, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.45965854198421413, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.008135271427712344, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.22821898087604608, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.4078372564340451, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.12748893606628386, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.3557730263992019, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.4174381195316011, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.5467874115748124, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.18187597339521155, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.47371700399657607, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.16537176735666792, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.377850748745802, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.10434737222357285, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.3018898621064535, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.4538018546822346, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.28525161483357986, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.523502554802427, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.39141851975461767, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.535715042850057, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.19736837482852188, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.33390746942953115, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.12309701217461798, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.21012928594872415, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.18399961762411743, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.24762685010055863, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.380770036603192, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.2372622545962587, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.41017604192890195, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.10941964146797248, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.26824334216983386, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.24960845712450644, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.1972799636982706, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.31568280470164284, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.47374051221492314, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.2511257790821319, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.17669429015388666, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.4466982237235162, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.028259642508149857, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.25363469927171417, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.15798724534926178, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.09458362068147118, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.19112709920459806, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.26917712505386043, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2839892385105806, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2145552140989996, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.10176289493427026, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.1958289008987353, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.22495130162065566, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.4290284967042129, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.14913041249319167, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.40440131346361496, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.049444999923640494, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.13746950527152393, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.025140789606938285, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.07209117403380154, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.19964955525867845, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.11450137919698138, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.38588319985262204, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2529258575884984, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.43636064973584276, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.2206598690689583, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4671100153557676, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.233464833213293, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.4707402347794446, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.17712030567063494, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.39640748418199956, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.19528111792372993, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.33640339785796086, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.29628055533074565, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5608629839236193, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.3738380800497378, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.5426797573029211, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.23519540953587326, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.43302228498495166, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.17815505223677908, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.08675107600029897, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.31559446372647004, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.3284807347106534, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.19276506991327308, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.43458455506290555, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.28127656729267564, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.48992770341073005, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.17027553201166185, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4357050316091199, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.2843469008409722, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.2316846840057159, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.14646977906722897, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4187706995483084, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.26460159523593296, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.46213112933303646, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.3104946263254643, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.5229367568544043, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.11661990462712164, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.09360571337807427, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.31554500596740015, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.2584188371476316, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.37301467093894103, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.4094746713677566, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.5127629442173787, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.1978740176644931, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.3475150762120378, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.1652327470881162, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.36089199879242, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.27241531995898394, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.2301408248787551, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.27399342653607417, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4466058864179787, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.3391824705480895, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4674445814701926, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.2623653043059387, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.39491069838469317, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.03595864795288658, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0983790525743442, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.28375777588367945, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.12525435825547931, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.2209612814301191, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.11872519045542135, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.17676570487933688, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.12829442435801275, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.1254577221421089, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.35339573017922915, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.5732827403734609, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.13551424610107243, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.07442538499398103, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.11642018526341193, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.12864673270139967, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.1415901806866318, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.12467595443249284, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.2225814067024197, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.1386200663966604, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.21110981509198895, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.33555397124550274, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.12977351408339674, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.07868103002431528, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.16676572906614484, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.32290710008434653, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.17085130234342075, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.16065348926853595, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.08308045537204375, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.16672381607546233, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.1507724411705623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.3882065230979266, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.5170531810333934, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.27850162207652013, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.4893867900242687, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.21492809973044594, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.4460691618745447, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.18709023221104107, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.18510871554566785, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.2971880635965443, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.2105995600117642, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3893190034495383, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.40665943808361543, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.5389281530404847, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.20298407172594948, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3472991039100052, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.02525776964585973, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.25984404704136116, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.2686424829558855, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3998516011592878, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.7037873295747725, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.7990268043083656, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.36484904083194636, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.5860311918255953, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.36429452903417536, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.5861802967934782, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.20871905976580057, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.5638397428015866, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.3020264362557517, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.5242629551035288, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.5690431244531307, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.72940147126271, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.5035531794651068, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.6494392775548152, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.5737074989070577, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.686909653107697, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.21153598996567438, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.19139378056089273, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3315891109577817, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.14220602298368126, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.39434634170155597, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.17627564495170006, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.2829970218265084, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.1526113403954924, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.15819017856679035, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.40022902080643746, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.2407047600997767, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.1519625654965485, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.22790337558585338, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.4825278786131802, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.22171131219306292, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.36739932894360794, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.27896316893887296, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.11404966072004964, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.22630395484390933, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.32904884841000887, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.24044159635524803, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4467052322987113, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.4269094075482239, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.6440338986542221, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.2089934379295256, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4202179280810887, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.2218554028345633, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.47222888132317115, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.20696541555694187, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4196178256392328, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.30677064886592076, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.39600415644017756, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.31568280470164284, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.47331489037280405, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.28948458010419736, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.5081976030749167, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.2782683483892406, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.43863270603769017, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.025462287071061632, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.2730653180659879, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.23927776234174902, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.42492275577244626, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.4192565023565104, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.47988160867636526, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.2825074232826454, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.4576805072760733, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.28387021048806443, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.45495119911742776, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.1445102529736995, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3613780570670241, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.4916952221580248, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5709218171628765, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.3167634335910152, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5181127153993833, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.42682382196874513, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5110919086972547, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3587966530826893, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.44358913447937065, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.5169126503115126, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.2356445175576872, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.33813801354578843, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.4546259941134185, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.24073557586211028, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.42665885117139607, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.37356506757316704, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.5184539879286838, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.26105773506180324, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4938667045198544, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.2909854083755658, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.48665440760963286, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.11900012437357166, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4188188601039167, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.25411310952997296, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4590241344003793, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.31612195725801134, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.5514235251355991, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.2618674380230805, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.4556417466102314, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.2415484256116827, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.479721997599474, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.130697570216376, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.3179818747026243, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.17080052973495516, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.3276453442491313, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.29977450507928116, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.33448899741633614, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4659921217718883, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.08369850049073722, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.2901642042976994, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.15444910001912215, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.3602646769850153, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.08449327708626451, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.2894243585178823, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.15293885404881336, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.2731842677262201, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.20498415630763028, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.39790541041372224, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.21856390681335636, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.48001348316701725, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.08071364532479955, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.2574794098875714, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.3001327916132617, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4410796277431629, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.010212667284259833, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.08709145169981267, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.2435168172078968, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.15901023657267704, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.08048862002869049, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.1853605878762701, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.14911046740590622, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.15103059042446493, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.1571296183656964, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.12287579518346711, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.3025336571585583, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.16684195647378827, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.4434377597535756, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.2581209976452507, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.03686480365068629, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.05917561307723289, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.302500899543063, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.11298984129215384, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.1968483491328458, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.31528213773035774, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.19951297936500814, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.3268993135281582, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.17837945138789355, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.25562737560272863, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.1403229830949613, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.12487405142186059, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.17100054419705868, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.48852222717068033, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.6719348281406861, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.3550594707678603, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.4477620872010772, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.16748366556857588, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.24705598637146278, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.06427240011492986, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.1693981615914476, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.23465398368701498, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.45367638954943146, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.19472361650315084, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.40377628308461305, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.36966653028794927, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.619000557599158, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.38110389492916247, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.6234623518191841, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.1919704825961155, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.48258076456494625, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.2773507370781619, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.5159249195742764, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.44317342695520384, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.631619947257763, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.5099592281930094, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.6072298627909279, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.4631700687380434, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.5030365960736705, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.009897295481178127, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.34260821758540483, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.5279077479961488, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.14978817967886865, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.3579627976703376, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.3428955163829333, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.4549331138881434, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.19374128629783371, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.42300747804792893, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.2319904737965691, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.4289093241727338, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.32950396894850414, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.15658555631153465, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.4282436178753299, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.3080840787435305, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.5213726866484168, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.26970223719007375, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.49889423329457655, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.22688173051219204, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.4245560010070955, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.038798017720540624, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.20189070954084243, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.4058343737951635, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.19196608410425278, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.4492698041709823, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.529528271776728, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.1768429537093963, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.39729894405642474, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.35462291728498596, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.5146453792122839, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.2548688930100782, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.17503462388587468, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.44997796125841427, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.6182809391974339, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.3911045875580893, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.5649289943680694, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.3074421255707261, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.5565613982556684, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.17327326423126865, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.25365021154352885, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.44903119868153707, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.1783827232160263, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.420993901006138, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.5471998982127312, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.7261143787285806, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.28098066731217336, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.5947928282659879, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.2875226333184278, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.588006186288413, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.209915242028325, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.458928494199136, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.18822842483525964, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.5517982259751538, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.728463219390527, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.41220068332005494, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.5170083079058972, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.2954601793007412, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.5471466168379375, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.05365191963585759, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.08673245635389941, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.26477349066797173, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.3471867713780383, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.43403373030619585, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.33731180652769377, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.43284789974371823, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.25107542027773755, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.4717531424710489, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.3060434879538489, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.5193433772960362, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.11350052389636105, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.37168631723243695, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.25107542027773755, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.40205887773726906, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.43242099485313257, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.6199378105417234, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.437242133308854, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.5933352820464992, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.6182013682539468, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.7316707888516918, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.010138118025021337, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.2887308472548599, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.49661681529787766, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.12765417324195955, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.10215316399947212, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.11108510046198607, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.095671247915619, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.11762645023115613, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.1192788515122758, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.07197439647670131, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.11667693485554276, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.08933914980645931, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.11553650371823457, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.11422824640116801, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.22669018884314224, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.44041899547143254, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.1701094120440135, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.29286151337006916, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.2927933973558131, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.47375496223846336, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.23576035344764779, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.4410130386349546, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.12806342795779524, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.11867161308842614, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.2993245195993835, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.4935525272820318, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.3174643312173473, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.4184322271006407, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.12737854791303035, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.13241068483589094, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.08640648924627486, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.15521968858995389, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.22688972701544685, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.06874614919231697, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.20986932076566409, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.1441116002025322, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.23046678878134805, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.18415822638260726, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.4239216335535842, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.1648232576186581, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.3767452176195591, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.15481642632105738, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.12441731199858395, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.3059238664644299, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.30255502678985613, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.4951608503180153, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.22364174996640396, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.4274071156780137, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.39386077108352097, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5087613640815436, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.22727859775180315, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.1311928332391091, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.26532814879858924, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.2916607130801699, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.530090321409246, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.5344974294085829, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6840734337993466, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.24006100607956476, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.525627718929817, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.2138189462062278, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.5026986017430701, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.18590033912422976, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.5277132126206505, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.27358384246243783, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.4269383120062787, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.4241307927670055, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.6124845435071574, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.26035035646973526, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.46778616416931385, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.2924332807580828, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.5630086380298369, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.10746287309590724, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.14373108296174128, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.3775704790319748, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.3180137236294719, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.485644095022506, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5954238368250169, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.3762634236591465, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.2746536544630471, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5097703431719256, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.19953096796729, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.41765409316042634, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.3132259424523443, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.486985188451814, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.3931991982536581, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.5785532289502031, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.3015379177292923, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.47219984621571265, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.45115893481205593, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.573184490403203, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.03030681702844562, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0911209149063426, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.287607153713456, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.33384668064796064, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.4509736001471859, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.27392978689521524, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.4257914116040595, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.17712030567063494, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.3607125350126197, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.20588295924038905, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.3896356890700865, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.20326644685906695, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.4500858289192976, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.1701094120440135, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.27393234790621707, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.29187438961211887, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.43018985278558913, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.3163068724060186, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.46966390516937145, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.22900275839396275, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.3639427717471459, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.02133189360351615, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.232738415750697, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.4383057556966281, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.20135763034646928, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.35006080282921004, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.37565846334638286, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.45365001576190844, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.25087428990920285, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4056641749589937, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.19984280537514973, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.3848167376504214, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.20551293694553077, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.39212627739793066, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.3527900076150821, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4631149439498215, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.29217369652859104, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.42254734634105184, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.3824323271187023, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.441028373714331, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.43683636444615426, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4737804899251307, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.15471749140281507, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.1511256708696252, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.32068800761395755, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.1529665009156386, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.15471428129658021, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.23308330456083767, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.19252575667755267, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.19291730665464807, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.05506209809653264, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.16401345929505193, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.15744611445105594, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.09762877195286271, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.29743205258174543, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.20630474086364167, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.3136206747907053, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.045871816440760785, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.13296075981357097, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.06757329102794982, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.10260020371628428, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.19591271282120945, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.18545347920261077, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.3552926519343793, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.5830807494515953, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.2440425026082214, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.4841963273421365, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.2665173445221397, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.4773652521033466, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.19370500984280545, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.46082177091138216, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.1339286445060473, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.40284578235338797, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.6594216880289406, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.4306385337550844, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.571178776405641, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.3972710678215965, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.5660601276423057, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.1271635774096859, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.10975134114072839, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.12514106545496687, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.07142565875275515, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.156820371591221, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.34038446123808824, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.5113782796801761, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.304657077674182, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.13440935424304148, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.07372315572460936, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.15220003737840038, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.2646502686401735, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.3670756593207092, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.25837130980300244, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.2908618876976738, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.16178785287683076, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.09811660434035342, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.1246356401043489, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.16340836420369564, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.2758741093443254, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.1423117641326302, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.09621288561909111, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.19434864892148712, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.15449597020263703, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.1548405622078228, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.15523404519916673, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.09802862511942351, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.16839811363216226, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.07197439647670131, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.21806094673608334, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.08157532940354172, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.10648920758550386, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.2161011617270055, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.006107696271654331, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.38082873348976415, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5493718467604206, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.36481383830535447, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5676950665117104, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.3021375397356768, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5402514274709564, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.2879557238941732, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.4713401374151406, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.19280597289814436, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.4994355439174767, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.3020028072429882, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5086556742243997, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.4114081637091976, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5087711002829843, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.4333488014632613, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.5965589158023283, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.02862331475958317, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.2639691450290325, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.19951297936500814, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.30595434646385716, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.3491726680217181, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.49174493737704345, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.20326213373677707, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3965155400704921, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.24460949326708067, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.38733092970267924, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.15491814189192113, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.08792400740884215, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.17948702635267894, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.20904996083879818, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.43827728785185827, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.3375804740497263, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.4148119294349798, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3014454929306372, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.11817024510657297, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.15384750052099497, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.2740074272472142, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.14220836651767108, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.19783691883509516, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.35420217205091864, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.14287380394182542, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.23836122054487227, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.1416144676550763, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.19125133772985436, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.28829924061405854, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.2539342198718324, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.44052227552801354, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.12360545410216946, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.27606723250380166, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.09371774386674346, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.25645319708641284, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.024496873668008953, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.13731102930446024, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.23813185482579471, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.14713433937849357, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.23515811334479123, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.41733625901113247, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.2550611509722341, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.12119462149205885, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.253313434440015, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.14716520874694558, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.13736938910605517, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.08898219182045804, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.24022591386861764, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.08044369277849628, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.33820654616065665, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.17525088821946255, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.34470995240873203, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.07526061966136668, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.12790226901170157, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.36821398145189993, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.6015510626637584, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.22008558571360354, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4407629091157834, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4292296939530436, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.153457516394788, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4779432111086399, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.42938082279138273, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.36720232244744416, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.18421227895863315, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4841569295186865, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.17659857710927052, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.43901708743119683, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.16251572062938138, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.504137792563395, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.09294802312471048, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.10039894744886725, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3531607188042858, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.1349922772392652, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.1381958549483014, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.26422743815167654, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.11714319535206957, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.11346446511593337, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.15991482183163408, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.1305157276666975, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.1059786102229136, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.15586951077565686, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.31800940138537775, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.5024549186437189, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.10290348648040436, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.1604076716890132, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.11294558347159712, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.1407490279587376, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.11282878483968255, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.16649202141995997, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.20972571494011877, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.395894071208527, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18559542135951204, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3804842882867387, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.35369375385786006, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13087682931309413, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19462952976787054, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.013538497707846785, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.16678872216161894, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.38156158663679846, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15471428129658016, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4580211317461481, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.21940429389247643, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4343280866601455, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3300025916068812, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5052501972629104, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1824401863423467, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36709433185688595, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3377854698776805, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.521201229892482, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12475846123062707, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.27823340731817514, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.15122189206102096, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.26750110507308866, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10759927692349745, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.21065794536310511, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.07843772989359644, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1324578891826276, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.08163977068875294, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.14134641571854575, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22948919855739472, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11809057094812304, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.27930342777387007, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1475503033983142, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22104108935973044, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.16434349396840395, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.28582614857210975, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.24911274612875411, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3603818786794888, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10085167559661873, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.23831215045289575, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17543744527808774, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.28201016956553354, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.30327872414714485, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.49804213541579834, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.21685485833927476, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3714219747170047, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.12274092982883021, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3385513651938691, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.1463197333291977, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.366137273378509, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.12656494026948834, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3156355830822428, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.13237645860785527, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3818322535970043, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.4251675822745958, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.18154235663145316, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3906877817743504, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.12487405142186064, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.32817291858267583, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.16701570871784516, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.4021286881032558, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.29383139922210444, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.15799783604363904, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3949243937510492, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.12787395553510186, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.33752742535974617, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.112289032173749, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.17726100052085036, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.31017716089889963, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.14276716121505195, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3191375424862687, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.022303919896869945, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.13181313433495553, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.31758120882708796, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.36577446688152704, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.33753843688529356, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.1463197333291977, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3593717322097392, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.010176705289341573, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.13628770358024436, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3124983184732695, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.39962545473912425, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3710595252626966, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.21688283061839067, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.41775824162589076, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.18235247300784824, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.40779523977234755, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.013915288440632284, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.39112369376374106, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.45117912182963626, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2202248274013358, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.48474965676300186, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.45813938111627356, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.1593344703029041, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.22494952618128455, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4760660341798742, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.14326513489612383, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4034278533385552, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.12666372160329223, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.2650373529479294, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.12162779391619735, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3228288840559658, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.1649662542496744, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3466546857451185, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.01536966738773372, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.13829446068705525, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.32059338352121075, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.12319017561119809, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.38132934354408266, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.16521691795932783, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4134512022176617, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.203264842568494, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3435867188688158, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.12366644075037489, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.37651342775995167, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.2962222000049211, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.1971903602140518, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.36269646528997446, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.18294117097472648, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4383387744769579, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.203264842568494, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.2922087191170089, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.18237599479708327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.3740403511567824, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.2244748716483542, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.007281906895508523, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.17092467746295725, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4340281226634826, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.3969902065412634, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.22860414459682069, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.47331131010100724, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.17200673466668953, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.39948318545775324, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.10553225565626573, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.1763116500850642, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.17730543118229922, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4201842844735916, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.20298407172594946, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.427376330935813, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.1740044679403827, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.36375152376157177, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.12876689524369925, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3253153379449275, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.13269353024089545, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.28998089836851504, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.01357525601063516, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.382987159925022, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.23159459211256597, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4615951366251923, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.17558199612672082, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.41334979014850587, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.20947801521367798, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.37699245483283905, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.24318848592140954, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.501343318078065, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.13784906211485343, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3161105981607342, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.15626231814206226, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.2918712789926548, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.322788951728102, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.40263021320001785, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.119159749312327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.21297942664093145, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.1405026510197826, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.24785258181936404, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.022303919896869945, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.15325316503089068, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.2756316951639811, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.27021732648475527, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.3720094104315791, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.3210853623565359, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.009559007108143848, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.05937666456658802, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.28306950244125495, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.14063630555225284, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.24531520458611372, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.3264287329357334, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.41662443172249786, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.21397099133614067, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.3568171392601981, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.16925466459550803, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.35912398848424326, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2036348471340078, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.3472831655579266, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.21547697432588886, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.18039960295364865, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.14134641571854575, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.3078571099929154, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.3117564667581329, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.15658994837053716, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.3084004707364603, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.20215771603666896, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.34483322672745376, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.16165057948216605, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.35172210628524053, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.012458960343878354, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.20053583653512705, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.3585550644386862, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.2298971389591186, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.45764667682340326, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.2327080490816513, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4213315211213489, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.24362353508932386, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.14005830765988142, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.28271314565258726, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.01943377856541192, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.22183437291807073, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4690517750319636, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.38140613622171876, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.15089318423122547, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4354703980715437, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.3931382365355541, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5429286385993002, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.007237155276460672, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.22233922818300378, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.46750271079023087, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.2769725060346048, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.2568191876426829, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.28135849152758385, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.13585608692428647, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.24602093467402117, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.014749122939855126, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.22585782564798598, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.15795370509162066, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4301348480354066, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.2462954618610128, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.25512324153300714, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.006249447069096045, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.17131793456589922, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.3238973846683935, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.12832055613623328, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.19433944404681203, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.14482189302397735, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.2913876815877049, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.1217802106941195, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.19314598726036322, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.022303919896869945, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.1361658548186748, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.3295167855876769, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.36936951339109975, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.12819825042984195, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.36515328991507745, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.21743769222637532, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4131100936190792, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.11914562165195522, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.36399317085467314, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.1302352098354987, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.326035134708999, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.37693028676849333, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.14410670132605607, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.24025207593480963, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.16306957103469613, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.14219389639501667, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3550945020345845, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.013501937941345124, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3791177761741048, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3486134995973391, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3284649068899757, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.20988785322505515, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4323069807401831, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3397876134677058, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.009628007582726738, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.20247469739337648, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4418847146430419, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.24942094354139677, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.9199349282509897, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.9199349282509897, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.8827916928185874, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.9278293769424701, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.5919743410620021, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.8142101616656354, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.31085126015841524, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6363359373482358, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.6642718379939968, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.7768492311706325, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.7660237942267061, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.8523393041110139, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.21688283061839067, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4479129164961325, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.27434065146872866, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4551761513917315, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.28112283847231073, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.2624404628276128, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.2044894275649509, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.1423256407233325, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.3890910518336767, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.28253893006668057, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5344527156938984, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.24797984721910182, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4497423075151473, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.31443515194397026, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.47095169791332614, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.15310672854444382, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3863552232164501, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5712128723523814, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.2112174444529806, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.37115876147810895, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.11460384138378832, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.32308861733051, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1308613527030366, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3063146286877558, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.15082713742973322, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.32932326410706136, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1327211341271203, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3314509193319989, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.15844990886367694, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3420386248472483, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.2692774258928226, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.41794128483626714, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1357521816192783, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.2572585481186862, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.12724240656680139, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.2057386078262138, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.2045516326940124, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3580560102192693, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.11682130307923512, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.12966020590511304, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.27356002971987425, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.12030921204016166, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.2935108999290831, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.22974300992320248, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.35766721538849355, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.12787395553510186, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.21931515993565381, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.12066241764747698, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.27545568870085096, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.019283248858266676, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.2136628330685448, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.28253893006668057, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4976525036856883, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.2130931749764531, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.34386259606696806, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.13181313433495553, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.2789141599069488, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.2912425895319303, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.44041590401859537, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0994331256564067, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3083809409945523, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.20416354003164872, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.302007087079803, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.20236126962624626, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3258121781111335, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1812045836887171, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.18649703687001343, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.1441966459257424, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.10163106686838855, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.1449839903475139, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.011839425862120785, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.12217624912667482, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.22607924847614314, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.24641985957917703, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3656201332966237, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.14455493909089934, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.26045772569635717, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.060104699568344466, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.13351234315884475, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.21097478779973527, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.1222932912515144, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.27908865215418427, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3015846610603115, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.23870544239673078, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.30389718661514126, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.14288815197601673, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.14957316612525498, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.15892175003851755, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.12238991307064728, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.12217624912667482, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.2594914718095331, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.13237645860785527, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.28860674393595576, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.2741063450190292, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.3186528530268389, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.23870544239673078, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.304002814359566, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.11955848575576285, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.24214093597439865, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.30667375521853, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.17081061355061614, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.3645499017230567, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.26958290276046354, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.3771988116643981, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.27675048474641756, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.3780460244391623, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.18915983539487516, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.3233732862049797, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.022303919896869945, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.17730543118229922, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.3932130355670893, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2792720350291165, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4727106572557922, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.30611912992377904, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5089658823760935, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2350059388724058, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.47180561431754137, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.25666145410765273, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.42824842089739035, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.01218568017760398, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.28487594977117575, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4793282150965233, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3615889761528277, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.2758862937563794, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4673996585329364, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.12503614625842938, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.20624064341134082, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.14539971733340926, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3142480613342232, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.013501937941345124, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.1307524497557363, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3361579714658665, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.26784884804296605, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4730778159019489, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.2577701686990218, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.44408712841584985, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.30676022061786057, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4541682078728273, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.20889290280122064, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4127473131540302, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.20666579426708878, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.2472264820166318, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.44131209510775493, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.20588815727980112, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.41944461991174653, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.28234422994155567, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.40408604199549997, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3368893372278425, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.22475293380632405, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4113282938664548, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.02586907629320447, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.17504630199215807, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.42263957382757394, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.2970253066411792, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4937281552804402, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.29668873351523645, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.45439697849389765, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.22475293380632405, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4113282938664548, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3300025916068812, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.48848603918467354, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.07378730454743347, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.2147829756231977, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.461889639754688, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.13588969750586194, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3523239132597748, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.1781853859048144, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.36865727091511874, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.2961559727627133, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.1087256678530004, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.2806568392288235, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.1117144649534104, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.11595071162902998, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.23613460338414927, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3018966700214589, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.19000969221027156, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.47485346348682694, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.1826249361348376, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3484922054626185, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.14214337448390021, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3609159299635901, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.12580941330293896, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.25539472441248895, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.32512365186675757, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.49575683948811416, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.2775686235755007, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.12416350645592025, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.23265120010755289, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.12846497020051437, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.2670865602673704, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.12837839907779722, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.27407435258732404, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.01943377856541192, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.28157636825815224, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.2957851779920877, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.1380829650365223, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3721998912104682, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.04086511004567092, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.2602720291074952, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.2181762689207584, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.16091123830242154, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.29683125676353944, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.217554942150074, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4362387654025806, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.18772266185346026, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4516796575038181, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.28252374116432993, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.3549531183419122, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.11203754340102182, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.24500939878540784, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.028862054978314452, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.1457751611852363, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.3555305489093387, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.3388147925328834, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5613545455926224, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.2567411255293559, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.43316022307538615, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.11084119214562044, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.31189626075375737, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.26845008380756696, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4949662669458603, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.006356553689956574, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.14219389639501667, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.42294374285621605, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.12876689524369925, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3034375834959013, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.10531636385748798, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.15775047351971955, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.26128489301072644, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.13693974024600017, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.24848210874138496, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.1967909040251079, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1331440297382392, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.260380807406192, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.13181313433495553, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3015940724773674, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.19814442729413892, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3102637353553794, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.1547905499593561, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1797040059786851, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.31684376069016223, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.18285404868730815, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.30239028036773985, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1789898550500511, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.3097165910502381, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.2126837065505244, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.11378142777276677, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.22340791296245502, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.15292856632736312, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.13410639648320277, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.2766307218364423, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.28215396128745796, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.42707262506779775, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.34661236387446376, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.460219316901126, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.17615667556197442, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.2360342341767823, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1665583359843711, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.3065906311539413, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.15533586874332386, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1629119279942046, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.28005921777118686, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.23292164090728384, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.4807364086898486, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.15742302643532463, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.3220278551038813, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.18889796346849766, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.390828018955539, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.1484131243041233, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.29204141824583923, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.019510108479333106, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.12690518984438146, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.34504023491572783, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.32158597295125274, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.47599808838263624, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.16605519952887438, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.4509175930579823, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.1453056698669881, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.39967038005662203, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.2533911497972673, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "bleu", + "score": 0.3713354961020269, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "translation", + "metric": "chrf", + "score": 0.5550866336796069, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.15404632289830114, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.3934040018417114, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.14855426866172083, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4089031318363594, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.1736086198203101, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.3517982963278223, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.34932113360659606, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.19118896363692645, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.3809795532418233, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.1463197333291977, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.3917791418162285, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.39734352171767023, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.5945486848869352, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.2577701686990218, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.4782520457453995, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.20868721961570674, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.428749486637124, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.12718016030558363, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "bleu", + "score": 0.17600429416656618, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "translation", + "metric": "chrf", + "score": 0.3699002945708035, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.15606652450871636, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.38676973597326414, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.346045680932875, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.4621083930255766, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.25751650996406256, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.4024993164207148, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.2235704325446919, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.15022502107020383, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.35815688949507335, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.404747550284944, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.2569630819965695, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.4638113964856152, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.3110277298634108, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.4544814134892622, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.1514798524753532, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "bleu", + "score": 0.21085288029061555, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "translation", + "metric": "chrf", + "score": 0.4495966880080192, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.13628770358024436, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.30464291275706445, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.2873180113751827, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3691291664744644, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.21636840076404606, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.19001225225138996, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.15902128868434096, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.056621705833762526, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.20663551397330182, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.13423393480752616, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3299575528823155, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.1342816454725345, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.2947742083176782, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.19968127388777596, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.3882998250788871, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.09083570416152802, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "bleu", + "score": 0.08340582868969061, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "translation", + "metric": "chrf", + "score": 0.24375361043612415, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.29331713922012836, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.18582826054135923, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.27931164611946097, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.13628770358024436, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.196045070105177, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.24882723725145164, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.014111660160258993, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.11825188614600338, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.309191449874187, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.2884821307335442, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.33382277849238146, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.2689798160106529, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.05338400788913371, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "bleu", + "score": 0.15069228960836628, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "translation", + "metric": "chrf", + "score": 0.3780369016923967, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.22256474447332572, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.1087256678530004, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.1812150267056357, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.09624090077172921, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.08196612912062277, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.08546743910655354, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.009628007582726738, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.10539140971370214, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.13307637507950731, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.248301283911442, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.1612982609267219, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.026144042496829667, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.15080392997556943, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.020035925770441693, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "translation", + "metric": "chrf", + "score": 0.14949577610607986, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.27274442393032494, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.4767475272675149, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.08206174754800233, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.24600065227133203, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.2894206730562163, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.130697570216376, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.3655209094420809, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.28364158842913945, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.10704604894593339, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.28394660030720387, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.27434065146872866, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.5335196659354084, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.14868720326332424, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.31851758158862814, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.16574624158406068, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.39903127217688206, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.17441676789914212, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "bleu", + "score": 0.11047111196276078, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "translation", + "metric": "chrf", + "score": 0.34052178710726805, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.32404902054836443, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.13600287923663476, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.296137260965594, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.29939861106519894, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.14523626605098836, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.30140546115451755, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.025375434133933374, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.14023497693876652, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.36380485738859425, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.13821968766350226, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.38195203502692965, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.14062598436731893, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.30895718290882995, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.3379310421768954, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.18983865085357438, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "bleu", + "score": 0.11846592694382017, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "translation", + "metric": "chrf", + "score": 0.3327568989410052, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.2711712970899214, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.12748547320686965, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.2256255768392581, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.13897785762455162, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.23836155147452145, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.013649374730290785, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.22198095012235933, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.1434542641988108, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.36240616778818124, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.12276168155242136, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.33010159216538154, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.33188650052532137, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.15362952183523224, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "translation", + "metric": "chrf", + "score": 0.30296187648016454, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.2418791601714353, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.3327258966414523, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.18472476303374016, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.3755456818093384, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.16879768238484785, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.25555397978788114, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.09786652404503388, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.049912483769917554, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.25692379453535436, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4546530774152114, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.13836903384315105, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.3041148601474962, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.4258132414030401, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.015512728671064098, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "translation", + "metric": "chrf", + "score": 0.15128322251777712, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.10028126671358768, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.21465229625864304, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.12004125280185217, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.13075268692454742, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0953744984725567, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.12169529780486223, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.012305643362227912, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.03909826185078624, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.14419884809836403, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.30891047545292866, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.1736086198203101, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.26516347846751803, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.050527960640167395, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.15324458304516878, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.07507324760135455, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "translation", + "metric": "chrf", + "score": 0.10491879465581472, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.16340836420369564, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.36283746947775286, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.2012788513843773, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.3241416301984527, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.222140444588514, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.29953031595565194, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.17864866390812006, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.3351983303537736, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.00778050477663876, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.10228157247360439, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.28364356692412224, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.17781916046116683, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.4033164239257758, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.26660495672254986, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.48379919869596677, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.19694774164152168, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.38832716646104737, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.0942697851358349, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.2909029546052453, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.08096470168539781, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.1842927346134308, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "bleu", + "score": 0.16419136872156925, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "translation", + "metric": "chrf", + "score": 0.3585424355733966, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.17092467746295725, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.36754216836174997, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.2290624375320133, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.3703637148185826, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.20316405901471601, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.19469497781111866, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.015417060337592398, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.1686298660722932, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.3297355225093461, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.3712864745826113, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.15022502107020383, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.25146961869209467, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.1537414828207279, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.26860510051078923, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.08158808350367645, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.32384784747508405, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.13438821320662678, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "bleu", + "score": 0.18398732411042557, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "translation", + "metric": "chrf", + "score": 0.3365595470257617, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.2048509043540121, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.30660769477099914, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.13583266175611605, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.3216755467134329, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.15351923691242442, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.17346101917834872, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.014779059188632846, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.2027129184521292, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.19506658275416644, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.37390084654467903, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.2570154296975664, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.30037434123502954, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.14019283646199668, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "translation", + "metric": "chrf", + "score": 0.29482817900208275, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2625069486168988, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.18398732411042557, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2401204673210609, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2557074827472021, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.11342612980533952, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.20642155523427064, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.1844324702587695, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.19003666214886608, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.2972636640814622, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.25656344973855477, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.052170870839955336, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.20873122779190237, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.11588655450753808, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "translation", + "metric": "chrf", + "score": 0.31184111541609666, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.14645066834461026, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.3063157759715837, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.13937542038981274, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.2484339727687842, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.2737880982480958, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.16027177058640993, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.32787056009967885, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.014191769632389028, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.2990163335159231, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.31381603830277516, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.3537931405693036, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.24806225111003116, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.10334447217760966, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "translation", + "metric": "chrf", + "score": 0.27914163518014634, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4018398428695008, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.13382613080002836, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.2537208483050712, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.2630950732259493, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.31644878211504707, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.01357525601063516, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.32899482453344325, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.37474288032090364, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.19471699714049806, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.4243692249576114, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.3966186698883196, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.3773883829664062, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.16426747109440132, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "translation", + "metric": "chrf", + "score": 0.3305176755869093, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.19834633509680927, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.3614699644212251, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.19000532642952978, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.2405617210713385, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.13181313433495553, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.22684236479857312, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.12605968092174913, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.2981369001931117, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.01943377856541192, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.1250305362182298, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.32984658078581847, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.22065986906895835, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4278272026300076, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.3740469325434541, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.12303973923740176, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.27974292659168287, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.22900275839396275, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.38971013705057145, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.2568248089527478, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "bleu", + "score": 0.23596413141640699, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "translation", + "metric": "chrf", + "score": 0.4460483451210987, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.21010332378415866, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.16049139739945859, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.019878741152560272, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.06349386212511017, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.07947942766247484, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.051273457709972596, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.23111943721577524, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.07507324760135455, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.12104111874038677, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.00966183574879227, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "translation", + "metric": "chrf", + "score": 0.15278097980557423, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.22559518514805962, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.10902248103931993, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.16583300501883477, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.09500430486926614, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.16275460726319185, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.00900384448670719, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.10967147003271659, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.22965648229220062, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.39443611468201295, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.2575778955813508, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.025268949786087753, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.09146177671563739, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.06902846920071223, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "translation", + "metric": "chrf", + "score": 0.28319385484238924, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.2154897801937284, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.4083528236853434, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.1740044679403827, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3783149893116463, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.1258273118584677, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3207394929288255, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.12217624912667482, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.31969612320402657, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.17935682644456008, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3091351691666631, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.14391777351450838, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.44334129048496157, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3628458215058983, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.10090835404165316, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.29517668740514497, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.20194534725070323, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3932742385329565, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.16136227410457432, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "bleu", + "score": 0.09694361543655163, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "translation", + "metric": "chrf", + "score": 0.3440711775946441, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.16574121720327287, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3831242265731415, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.25011851152889697, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.37126196571844006, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.05671337518059672, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.26251266297831083, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.16778622160516982, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3951324994252282, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.12032473082025806, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.257107616327564, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.14988552732741944, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.38777884437248195, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.41537893836225864, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.2791853718580844, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.4442614192163653, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.16553354669449483, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.3691421692417742, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.13088135388440164, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "bleu", + "score": 0.16165057948216605, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "translation", + "metric": "chrf", + "score": 0.40481686014626656, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.10085167559661873, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.18125609699008438, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.07021707359312077, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.22277615768791725, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.08568635726825895, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.15015893458515112, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.08071364532479955, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.16483481050649815, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.01399317758408034, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.12032473082025806, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.2284570301856708, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.3056690039624212, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.24959865593445524, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.009321299833410845, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.03544218674244847, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.1649228640173518, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.008521982065882127, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "bleu", + "score": 0.08428682606415534, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "translation", + "metric": "chrf", + "score": 0.18678471555453846, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.2511187197601112, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.47819642315416905, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.3506669990311196, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.5110276476843241, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.2823806497463373, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4097065111577764, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.36354348777947165, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.022303919896869945, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.16027177058640993, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.38029889329714306, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.2821717364449649, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.46605842048009466, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.23564633388537584, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.447428006095353, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.1947354557370754, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.3243594420480133, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4771341471235553, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.2871055620293988, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "bleu", + "score": 0.28253893006668057, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "translation", + "metric": "chrf", + "score": 0.4809444160915631, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.1537414828207279, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3722050787080825, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.20079789489773447, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.36306781014142475, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.18254530689454584, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.1217802106941195, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.18747753884336388, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.15646461125164918, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.1004877071264788, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.27596073639342855, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3355401995154488, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.13628770358024436, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3424046100869749, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.10146459445560989, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.22506824404168607, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.11873119582007514, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.3049590220162351, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.013010079472105241, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "bleu", + "score": 0.08876856613382532, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "translation", + "metric": "chrf", + "score": 0.22686382638352, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.2572390116372129, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.13966768009198655, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.2813477376997818, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.20326479836901273, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.025565199775551904, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.20649200037444357, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.14868720326332424, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.3990908050465795, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.25340620432916805, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.24200501869186217, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.012909307569742633, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "bleu", + "score": 0.14410670132605607, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "translation", + "metric": "chrf", + "score": 0.3440349878645951, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.15014755317658912, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.37836116314741347, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.06225053846006199, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.19628052952282465, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.2461113033172792, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.23440058276054598, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.2954875428732851, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.12577829595095136, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.2424544420544208, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.28515517056035755, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.18696425994936555, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.46256405623661556, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.26289516212337455, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4739347975118012, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.30103736170459866, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.4638998374230593, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.0052216413024891595, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "bleu", + "score": 0.20519422155676922, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "translation", + "metric": "chrf", + "score": 0.37142687967287463, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.13600898159884844, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.3230757564905893, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.2235267829375094, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.4197003930616035, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.12017396628208415, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.17861403940933454, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.12307180394105495, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.17008525013313108, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.019510108479333106, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.12017396628208415, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.18853872865175844, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.1477411900250408, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.47448172512052467, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.33941255263446807, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.11458346677361843, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.13803309048601614, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.27926501329820147, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.048472513540084076, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "translation", + "metric": "chrf", + "score": 0.15895910055429568, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.13181313433495553, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.28588995835566733, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.1509901905071242, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.42614589006990955, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.16422774333078005, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.07387254485071908, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.18666450359774303, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.09706088585617588, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.1616203397255244, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.15894780290856192, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.4341048241384398, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.12107046798719928, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.26610410497187936, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.2187290170234865, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.12149201826977803, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "translation", + "metric": "chrf", + "score": 0.21149452047942327, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.19928950404960785, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3515916946368607, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.23693055763743093, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.47110207134358734, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.12322620396842734, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.2816556920367689, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.31252036326890786, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.11761715910158331, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.255091096526975, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.1303852679364709, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.3864868842126462, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.34706134175419817, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.26059169395816123, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.356639946525953, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.14615116208406398, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.2812535721592, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.16500884451724743, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "bleu", + "score": 0.15864869257759262, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "translation", + "metric": "chrf", + "score": 0.36522352464212327, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.1250305362182298, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.35192066105839037, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.13181313433495553, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.23734991438269704, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.05708041498286245, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.33319628333145845, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.012680136469239416, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.14326513489612383, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.3686286225188453, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.34008001988346953, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.1478206449158964, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.3434941005489122, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.1258273118584677, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.3288942334010695, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.31233654238789915, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.2383335612549986, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "bleu", + "score": 0.1459371948060594, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "translation", + "metric": "chrf", + "score": 0.3585936421373033, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.39208241867588406, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.12671660613804978, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.3813787743264216, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.16338968219757316, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.43516585142042474, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.16359043508686386, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.3659400820420475, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.14614460466911597, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.3030057671331465, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.05027439173128933, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.2354400786909369, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.463705123915972, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.2475303873008388, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.4668999300883758, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.2196760025169154, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.43549997500530213, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.12268576462201722, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "bleu", + "score": 0.21921690700367402, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "translation", + "metric": "chrf", + "score": 0.3655682504876654, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.3141940399992296, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.2175732217739929, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.26934434229495274, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.10434360980785336, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.1965525334564022, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.10163106686838855, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.19592782150484342, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.14463936736378039, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.06695900686562914, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.17379715979609378, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.11318741602028208, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.3385268327723179, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.2521939110082033, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.22946638870010946, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.10322985794794913, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.23137708386333908, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.13877364471405382, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "translation", + "metric": "chrf", + "score": 0.16536256958499396, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.19331968002780792, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.30653379537681946, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.16419136872156925, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.38578825514877557, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.11319316697505612, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3044933526278424, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.2055907593919444, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.11772040354469114, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.26408792460406616, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.1334223706673101, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.32135454277697334, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.21322825633117104, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.46552307123187675, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.26036802768146033, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.40212197517878956, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.22177657695506436, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.362986513681601, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.21076307738524264, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "bleu", + "score": 0.14207405313947058, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "translation", + "metric": "chrf", + "score": 0.3375092428241583, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.10372851412328025, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.05428552151774627, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.08170272572786982, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.054560358307203495, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.08394808675649712, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.013724307289441972, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.031040886211736902, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.031040886211736902, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.030944828051431932, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.020277511772040634, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.07555954862563066, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "translation", + "metric": "chrf", + "score": 0.08035467692935112, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.2197940423573754, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.3252925042873819, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.2386512909161297, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.20056920628012903, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.019208954982955537, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.14965254226465277, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.26352230043195884, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.4800402838260972, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.2744862572324789, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.11146907857325296, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.30553778277241345, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.011981278425892186, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "translation", + "metric": "chrf", + "score": 0.11890083473431896, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.16894127989367852, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.3419460050290285, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.3808157877385458, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.4468825428794822, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0810371533925042, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.14661692946967528, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.08644370615638991, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.1799031576653526, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.01399317758408034, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.16961162496305443, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.2560718753815588, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.36565527196849945, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.5772340526881832, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.42571201827765304, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.4882229662112406, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.11350052389636105, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.20482099945558166, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.3757030069486771, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.44130967365135815, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.15641377436989223, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "bleu", + "score": 0.2926394666583954, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "translation", + "metric": "chrf", + "score": 0.36657660679561177, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.14062598436731893, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.3764695982007195, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.26772209592710927, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.16587560311800356, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.36304298665164114, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.12162779391619735, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.27837115847479993, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.015137007844878625, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.130697570216376, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.4014069285003254, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.36149919525841795, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.1584846494016487, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.36323507681554296, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.17853738103790043, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.3932611225012359, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.21824882776994264, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.46610251429617194, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.14482189302397735, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.29373162379324574, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "translation", + "metric": "chrf", + "score": 0.30486946045076013, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.19207278754983098, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.18980024752692398, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.17719555918098742, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.125919848913809, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.12820373892360745, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.12835230243937998, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.17253693310798596, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.013724307289441972, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.125919848913809, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.193779543005732, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.18123509091399645, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.13447737207790966, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.2292142526494232, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.11653181164997596, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.11943156166987552, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "bleu", + "score": 0.18043239916836057, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "translation", + "metric": "chrf", + "score": 0.28954208185359936, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.1968536715007284, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.333542500417417, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.17016486621490087, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.3423671585123332, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.20172859170422008, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.1422809818637565, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.013649374730290785, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.13033894166590243, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.32686841499906305, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.2572381952329596, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.40593200139482377, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.1511794025087067, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.3320313756459434, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.011615369706513964, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.08244068023641246, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.1459371948060594, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.3420465554679724, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.01361221447158419, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "translation", + "metric": "chrf", + "score": 0.28697920902986146, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.14976409594561182, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.35357199599186406, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.1665583359843711, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.2627459510960287, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.26627996704195217, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.13836903384315108, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.24978646356047463, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.01274477939514699, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.14023497693876652, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.33813739363247586, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.25006681812704, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4571706387484243, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.16829158981319015, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.43343086093146, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.18889796346849766, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4769236082569465, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.17382262531034068, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "bleu", + "score": 0.30611912992377904, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "translation", + "metric": "chrf", + "score": 0.4602147817127031, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.20740300708624634, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.11206360906932318, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.11591282390598331, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.10734755849153174, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.1547149164508717, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.014396080136217076, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.08956661266587752, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.13908487697830615, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.22268940623874697, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.18086432478827452, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.11823053204772466, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.2436674848852202, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.05991545180730296, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "translation", + "metric": "chrf", + "score": 0.11204525682954576, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.19393790238357375, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.11424261736422782, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.2214641710932888, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.2060727232464618, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.015322564973157411, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.16060318251525468, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.203264842568494, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.4205735776331965, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.36353587005992366, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.1465805099425248, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.2504062375822152, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.08423268420860885, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "translation", + "metric": "chrf", + "score": 0.16575157845273134, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.2354734090463839, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.15803708011407422, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.1534362334139164, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.10180772252352548, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.1504685575799601, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.019510108479333106, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.08419649365121126, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.13894487895537852, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.16029555355562974, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.20725689371393963, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.08700223397019134, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.13038203143994997, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.08565389722143929, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.029825717020904915, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "translation", + "metric": "chrf", + "score": 0.13917503995313457, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.029484944886992947, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.08447773742536654, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.13534893625087907, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.09370499064568831, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.09290214674953907, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.13365574149528514, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.08208989406093385, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.14930624110293342, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.3164816989291774, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.14389998233563306, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.019789621633805, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.015694709781828922, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.0013061650992685476, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "translation", + "metric": "chrf", + "score": 0.03578332662951316, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.41557169141417455, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.2152971203854131, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.45844266102619613, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.3510795373941694, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.12008699630291321, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.3057995109706267, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.014563813791017045, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.39078916716317236, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.4424078695089543, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.37940478449513215, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.37844523324950047, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.22192938454540428, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.4722416995824046, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.2997971304127524, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "translation", + "metric": "chrf", + "score": 0.3490555531851794, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.2603954279622387, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.30277029197532107, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.4119930658213665, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.3172363525732528, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.11118250314256345, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.15998889622726925, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.011952636623752582, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.18643810742149283, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.27451118944416963, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.49409921315387106, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.32339783931086485, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.13682541096468273, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.03859789398073438, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.2579829463429384, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.09893019435891957, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "bleu", + "score": 0.09812163258584553, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "translation", + "metric": "chrf", + "score": 0.21604420923574277, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.09824473131578967, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.26161454205511375, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.17813207960290023, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.12613950642084928, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.151940344351269, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.01227543166891452, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.10007559720315146, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.17039219737754951, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.23064197108367845, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.3431872475401807, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.12288275235434755, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.024803681108889294, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.18600740402846921, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.011107583005137327, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "translation", + "metric": "chrf", + "score": 0.08459474168598785, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.1077448900968642, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.1466632434186726, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.19398573687939527, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.11116961409150189, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.14407603400456293, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.09701470884546518, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.13602315844950702, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.012097231620361405, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.09567839473656903, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.1420055095780977, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.1691891767891315, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.3972953748769731, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.12593609141437836, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.2608331698897448, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.11116961409150189, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.1400202211268643, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.1178826285649154, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.27891920549891147, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.017630650669775427, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "translation", + "metric": "chrf", + "score": 0.08279488257497868, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.1535686541317235, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3273820311085689, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.1479757676110522, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.36882257747840863, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.26565870470756586, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3292881368564412, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.014735502561072271, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.35840752330309295, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.14270596284245182, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.39124824093913935, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.17335685887215152, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.4144645977372426, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.16446075661451018, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.3554659823158598, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.12795100096585615, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.40513606402908053, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.011700892988098854, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "translation", + "metric": "chrf", + "score": 0.39515912915016366, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.2393868174483411, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.16951713127949472, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.13967106347277614, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.11873119582007514, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.23521660134811131, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.003917516359736889, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.14618317074862378, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.203586302029077, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.08382475612465994, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.21886462133645654, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.1227835638933406, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.09628098247644358, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.04911913163341779, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "translation", + "metric": "chrf", + "score": 0.13078905745700525, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "google/gemma-3-27b-it", "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "qwen/qwq-32b", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "google/gemma-3-27b-it", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "qwen/qwq-32b", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "amazon/nova-micro-v1", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "qwen/qwq-32b", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "qwen/qwq-32b", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "qwen/qwq-32b", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "qwen/qwq-32b", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + { + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "qwen/qwq-32b", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "qwen/qwq-32b", "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 6 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, @@ -35054,7 +215814,7 @@ }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, @@ -35062,55 +215822,47 @@ }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, @@ -35118,15 +215870,15 @@ }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, @@ -35134,47 +215886,47 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, @@ -35182,31 +215934,31 @@ }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, @@ -35214,7 +215966,7 @@ }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 1, @@ -35222,31 +215974,23 @@ }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, @@ -35254,6499 +215998,6243 @@ }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 1, + "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "amazon/nova-micro-v1", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 8 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 9 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 10 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 10 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "nl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 10 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 11 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 11 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 11 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 11 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 11 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "uk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 11 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 11 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 11 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 11 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 11 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 11 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ig", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 11 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 11 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 11 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 12 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 12 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 12 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 12 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 12 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 12 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 12 + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 12 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 12 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 12 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "mg", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 12 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 12 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 12 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 12 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 12 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 12 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "mai", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 13 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 13 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "as", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "qwen/qwq-32b", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "amazon/nova-micro-v1", + "bcp_47": "ny", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "openai/gpt-4o-mini", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 13 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 13 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "google/gemma-3-27b-it", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "qwen/qwq-32b", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 13 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 13 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "amazon/nova-micro-v1", + "bcp_47": "so", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 13 + "score": 1, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 14 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 14 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "mag", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 14 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "si", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 14 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "si", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "km", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 14 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 15 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 15 + "score": 1, + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 15 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 15 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "zu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 15 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 15 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 16 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "cs", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 16 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 16 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 16 + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 16 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "sv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 16 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 16 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "hu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 16 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 16 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 16 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "el", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 16 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 16 + "score": 0, + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 17 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "sn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 17 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 17 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 17 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 17 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 17 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 17 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 17 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "qwen/qwq-32b", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "amazon/nova-micro-v1", + "bcp_47": "rw", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "openai/gpt-4o-mini", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 17 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 17 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "google/gemma-3-27b-it", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "qwen/qwq-32b", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 17 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 17 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 17 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 17 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 18 + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "aeb", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 18 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 18 + "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 18 + "sentence_nr": 7 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 18 + "sentence_nr": 7 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 18 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 18 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 18 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "xh", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 18 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 18 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 18 + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 18 + "score": 0, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 18 + "sentence_nr": 7 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 19 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 19 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 19 + "score": 0, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 19 + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "be", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 19 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 19 + "score": 0, + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "score": 1, + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "be", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 19 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 19 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 19 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 19 + "score": 0, + "sentence_nr": 7 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 19 + "score": 0, + "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 + "sentence_nr": 7 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 19 + "score": 0, + "sentence_nr": 7 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 19 + "sentence_nr": 7 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "score": 1, + "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 19 + "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "lua", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "score": 1, + "sentence_nr": 7 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 19 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 19 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "en", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 19 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 19 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 19 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 19 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "hi", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 20 + "score": 0, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "es", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 20 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 20 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 20 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 21 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 21 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 21 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 21 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "qwen/qwq-32b", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 21 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "qwen/qwq-32b", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 21 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 21 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 21 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 22 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "id", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 22 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 22 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "de", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 22 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 23 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 23 + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 23 + "score": 0, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 23 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 23 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 23 + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "te", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 23 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 23 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 23 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 23 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 23 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 23 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -41754,14487 +222242,13519 @@ "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 23 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 23 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 23 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 24 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 24 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 24 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 24 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 24 + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 24 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 24 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 24 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 24 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 24 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 24 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 24 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "google/gemini-2.0-flash-001", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 24 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 24 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 25 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 25 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 25 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 25 + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 25 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 25 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "it", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 25 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 25 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 26 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 26 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 26 + "score": 0, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 26 + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 26 + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 26 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "amazon/nova-micro-v1", + "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "openai/gpt-4o-mini", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 26 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "google/gemma-3-27b-it", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "qwen/qwq-32b", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 26 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 26 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "amazon/nova-micro-v1", + "bcp_47": "th", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 26 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "qwen/qwq-32b", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 26 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 26 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 26 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 27 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 27 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "amazon/nova-micro-v1", + "bcp_47": "or", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "openai/gpt-4o-mini", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "google/gemma-3-27b-it", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "qwen/qwq-32b", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "qwen/qwq-32b", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 27 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 27 + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "amazon/nova-micro-v1", + "bcp_47": "ms", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "openai/gpt-4o-mini", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "google/gemma-3-27b-it", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "qwen/qwq-32b", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "amazon/nova-micro-v1", + "bcp_47": "my", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "openai/gpt-4o-mini", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "google/gemma-3-27b-it", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "qwen/qwq-32b", + "bcp_47": "am", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 28 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "amazon/nova-micro-v1", + "bcp_47": "am", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "openai/gpt-4o-mini", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 28 + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 29 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 29 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", + "model": "google/gemma-3-27b-it", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 29 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", + "model": "qwen/qwq-32b", + "bcp_47": "om", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 29 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", + "model": "amazon/nova-micro-v1", + "bcp_47": "om", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 29 + "score": 0, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 29 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 29 + "score": 0, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 29 + "score": 0, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 29 + "score": 0, + "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", + "model": "qwen/qwq-32b", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 29 + "score": 0, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 29 + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 29 + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", + "bcp_47": "bho", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "model": "openai/gpt-4o-mini", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 29 + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 29 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", + "model": "google/gemma-3-27b-it", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", + "model": "qwen/qwq-32b", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 29 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 29 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", + "model": "openai/gpt-4o-mini", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 29 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 29 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 29 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", + "model": "google/gemma-3-27b-it", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", + "model": "qwen/qwq-32b", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 29 + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 29 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", + "model": "amazon/nova-micro-v1", + "bcp_47": "az", "task": "classification", "metric": "accuracy", "score": 1, - "sentence_nr": 29 + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", + "model": "openai/gpt-4o-mini", + "bcp_47": "su", "task": "classification", "metric": "accuracy", - "score": 1, - "sentence_nr": 29 + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", "task": "classification", "metric": "accuracy", "score": 0, - "sentence_nr": 29 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9411583614202783, - "sentence_nr": 0 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9389202454786235, - "sentence_nr": 0 + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8775848642818888, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8618703443763697, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7861888156926622, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7987489460131649, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9319748402595084, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7613425680699503, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8493237569441244, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9020031517329425, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8493237569441244, - "sentence_nr": 0 + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.883570112979728, - "sentence_nr": 0 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8263460336753243, - "sentence_nr": 0 + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8060322164809728, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8980680846396624, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9491059403137463, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9664300701360793, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9457224261353452, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9708225134054753, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9419324607589119, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9619002332717353, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9189927159116271, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.895905738615658, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8719916488298841, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9397108105925289, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.884345665982421, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9584454525436005, - "sentence_nr": 1 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9643081480127652, - "sentence_nr": 1 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9067144042813564, - "sentence_nr": 1 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8781616442886918, - "sentence_nr": 1 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9745733081082687, - "sentence_nr": 1 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9237743711831492, - "sentence_nr": 1 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9659571253320222, - "sentence_nr": 1 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9044755244774213, - "sentence_nr": 1 + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9016506657203592, - "sentence_nr": 1 + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9259203238585231, - "sentence_nr": 1 + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9226314544302758, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6237003645369218, - "sentence_nr": 1 + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.919365977563579, - "sentence_nr": 1 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9113270242697518, - "sentence_nr": 1 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.898943894327586, - "sentence_nr": 1 + "model": "amazon/nova-micro-v1", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9736119227904283, - "sentence_nr": 1 + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9415432301630186, - "sentence_nr": 1 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.973004167300919, - "sentence_nr": 1 + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9617726716367615, - "sentence_nr": 1 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8788632576179716, - "sentence_nr": 1 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9442690941930104, - "sentence_nr": 1 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9167527970009353, - "sentence_nr": 1 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9264966822048945, - "sentence_nr": 1 + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9760432643638268, - "sentence_nr": 1 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9290639912797567, - "sentence_nr": 1 + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9451284616565533, - "sentence_nr": 1 + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9571970948049097, - "sentence_nr": 1 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9428452278208271, - "sentence_nr": 1 + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.924510998540744, - "sentence_nr": 1 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9354255661287414, - "sentence_nr": 1 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9038448099971822, - "sentence_nr": 1 + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9290214610132344, - "sentence_nr": 1 + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359307328554756, - "sentence_nr": 1 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9462257677914746, - "sentence_nr": 1 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9685511109758306, - "sentence_nr": 1 + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9466350739636148, - "sentence_nr": 1 + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7785501063601203, - "sentence_nr": 2 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8677672451180615, - "sentence_nr": 2 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9519685270619841, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5524309559543085, - "sentence_nr": 2 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8979970994003059, - "sentence_nr": 2 + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8979970994003059, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9598023304313453, - "sentence_nr": 2 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8678877090803476, - "sentence_nr": 2 + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.3628854370408249, - "sentence_nr": 2 + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8866932684030095, - "sentence_nr": 2 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7932574787392968, - "sentence_nr": 2 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8840632918991035, - "sentence_nr": 2 + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9244224424282228, - "sentence_nr": 2 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7493760739956499, - "sentence_nr": 2 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9434070582654602, - "sentence_nr": 2 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8626111481890223, - "sentence_nr": 2 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9742381587466754, - "sentence_nr": 2 + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9614829239512629, - "sentence_nr": 2 + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9634058264556766, - "sentence_nr": 2 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.846746937646691, - "sentence_nr": 2 + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9416090102549223, - "sentence_nr": 2 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9586487245465463, - "sentence_nr": 2 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8628736669093499, - "sentence_nr": 2 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8883148663773122, - "sentence_nr": 2 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.921000444185013, - "sentence_nr": 2 + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 + "model": "openai/gpt-4o-mini", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.964284245003951, - "sentence_nr": 3 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899852954654377, - "sentence_nr": 3 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5884852453065169, - "sentence_nr": 3 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8943359440390058, - "sentence_nr": 3 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6239646156236577, - "sentence_nr": 3 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8782485779028959, - "sentence_nr": 3 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "model": "openai/gpt-4o-mini", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9219735185328113, - "sentence_nr": 3 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8379214027434272, - "sentence_nr": 3 + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9171135147465285, - "sentence_nr": 3 + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8793006100154936, - "sentence_nr": 3 + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6764135013792538, - "sentence_nr": 3 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8320911917964368, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8320911917964368, - "sentence_nr": 3 + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9020259333664543, - "sentence_nr": 3 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8443316591536836, - "sentence_nr": 3 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9062739514559724, - "sentence_nr": 3 + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9243814194896306, - "sentence_nr": 3 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9257122714800141, - "sentence_nr": 3 + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9048929676970495, - "sentence_nr": 3 + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9233238051356927, - "sentence_nr": 3 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8961117810241208, - "sentence_nr": 3 + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9137011072166213, - "sentence_nr": 3 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9144918070375806, - "sentence_nr": 3 + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9447475462972004, - "sentence_nr": 3 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9418568225974095, - "sentence_nr": 3 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8631885674989124, - "sentence_nr": 3 + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9540570534869818, - "sentence_nr": 3 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9356691952085903, - "sentence_nr": 3 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8263666332486633, - "sentence_nr": 3 + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9187937618702817, - "sentence_nr": 3 + "model": "openai/gpt-4o-mini", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6492261286778312, - "sentence_nr": 4 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4782990117524071, - "sentence_nr": 4 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8728890059382535, - "sentence_nr": 4 + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7924841060781368, - "sentence_nr": 4 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8728890059382535, - "sentence_nr": 4 + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8085699807438939, - "sentence_nr": 4 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9309167160514913, - "sentence_nr": 4 + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8335210974928002, - "sentence_nr": 4 + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9407617520385465, - "sentence_nr": 4 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9009704508776215, - "sentence_nr": 4 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.886161550229872, - "sentence_nr": 4 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8864780713525466, - "sentence_nr": 4 + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8619950335517561, - "sentence_nr": 4 + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.877644990158928, - "sentence_nr": 4 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9473578431592224, - "sentence_nr": 4 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8989284887461744, - "sentence_nr": 4 + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8982857165205713, - "sentence_nr": 4 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9421743042333945, - "sentence_nr": 4 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.909430339396572, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9114715597392106, - "sentence_nr": 5 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221676855227006, - "sentence_nr": 5 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.903310364652346, - "sentence_nr": 5 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.43631872104818037, - "sentence_nr": 5 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.725100223395414, - "sentence_nr": 5 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8342041754812477, - "sentence_nr": 5 + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7541096773855238, - "sentence_nr": 5 + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9352893606252747, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7137044016250488, - "sentence_nr": 5 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8459329201101423, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9155785169978052, - "sentence_nr": 5 + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.454243405917021, - "sentence_nr": 5 + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4367071875067552, - "sentence_nr": 5 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9053865214400596, - "sentence_nr": 5 + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9344907300105301, - "sentence_nr": 5 + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.941467473244312, - "sentence_nr": 5 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8759462570863868, - "sentence_nr": 5 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9116059567890715, - "sentence_nr": 5 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.95453015576562, - "sentence_nr": 5 + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9271804273091313, - "sentence_nr": 5 + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9494380676747487, - "sentence_nr": 5 + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8907525765155897, - "sentence_nr": 5 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9420326057327402, - "sentence_nr": 5 + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8729192735278123, - "sentence_nr": 5 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.840210783941434, - "sentence_nr": 5 + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8830406923187026, - "sentence_nr": 5 + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8705872791986208, - "sentence_nr": 5 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9129896861855028, - "sentence_nr": 5 + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9775140091004713, - "sentence_nr": 6 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.931908394385036, - "sentence_nr": 6 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.958499216692883, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9787648208394673, - "sentence_nr": 6 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8848447424869419, - "sentence_nr": 6 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9476480635849643, - "sentence_nr": 6 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8420296194650692, - "sentence_nr": 6 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9645398026978572, - "sentence_nr": 6 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.976975965491712, - "sentence_nr": 6 + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9575751193892209, - "sentence_nr": 6 + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917870378110458, - "sentence_nr": 6 + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917870378110458, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.922108923148009, - "sentence_nr": 6 + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9367021384173281, - "sentence_nr": 6 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917870378110458, - "sentence_nr": 6 + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9136709169732016, - "sentence_nr": 6 + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9031487241080103, - "sentence_nr": 6 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.922108923148009, - "sentence_nr": 6 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9717329164232313, - "sentence_nr": 6 + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9810420842974353, - "sentence_nr": 6 + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9296061535584738, - "sentence_nr": 6 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9548717794727779, - "sentence_nr": 6 + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9723617284409432, - "sentence_nr": 6 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9433216405879152, - "sentence_nr": 6 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9253992588631311, - "sentence_nr": 6 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7833761650543694, - "sentence_nr": 6 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8958698547783525, - "sentence_nr": 6 + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9659983030155975, - "sentence_nr": 6 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9368374793769542, - "sentence_nr": 6 + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9292848975349729, - "sentence_nr": 6 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9705333075369675, - "sentence_nr": 6 + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9560908971572966, - "sentence_nr": 6 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9288860917142431, - "sentence_nr": 6 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9402643484548583, - "sentence_nr": 6 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9303023646781129, - "sentence_nr": 6 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9076656012518489, - "sentence_nr": 6 + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9272618174968876, - "sentence_nr": 6 + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9630829363546703, - "sentence_nr": 6 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9437691960187881, - "sentence_nr": 6 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9233897890679653, - "sentence_nr": 6 + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9217593594034571, - "sentence_nr": 6 + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9429459010031568, - "sentence_nr": 7 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9680340601535599, - "sentence_nr": 7 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9444947592571505, - "sentence_nr": 7 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9524237679532525, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8521740000505951, - "sentence_nr": 7 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9147273981117778, - "sentence_nr": 7 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9353915284262971, - "sentence_nr": 7 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9575256886848735, - "sentence_nr": 7 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9544425909905248, - "sentence_nr": 7 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.887089742205764, - "sentence_nr": 7 + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8937272463225717, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221577416896909, - "sentence_nr": 7 + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7360571605491374, - "sentence_nr": 7 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424882191492142, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9158962896380519, - "sentence_nr": 7 + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9016185053131788, - "sentence_nr": 7 + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9519313199322048, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9002497361613263, - "sentence_nr": 7 + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424882191492142, - "sentence_nr": 7 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9404564646985731, - "sentence_nr": 7 + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9525612663771642, - "sentence_nr": 7 + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9416090102549223, - "sentence_nr": 7 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.946182450185975, - "sentence_nr": 7 + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8827665860178672, - "sentence_nr": 7 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9242269657430007, - "sentence_nr": 7 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9209375409360453, - "sentence_nr": 7 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9453162319718537, - "sentence_nr": 7 + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9354735336178899, - "sentence_nr": 7 + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9650606723493668, - "sentence_nr": 7 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.937172702008466, - "sentence_nr": 7 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9037456319061896, - "sentence_nr": 7 + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9527540439558733, - "sentence_nr": 7 + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9777992945719618, - "sentence_nr": 7 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9483614149601093, - "sentence_nr": 7 + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9630476322301069, - "sentence_nr": 7 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9090634311284931, - "sentence_nr": 7 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9592439701684463, - "sentence_nr": 7 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9352813563171796, - "sentence_nr": 7 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9578898822826803, - "sentence_nr": 7 + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9349087092124988, - "sentence_nr": 7 + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9553475775967099, - "sentence_nr": 7 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9426144990998162, - "sentence_nr": 7 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9455357310467346, - "sentence_nr": 7 + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9950087915805451, + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9950087915805451, + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359599516797827, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9950087915805451, + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8529883661830301, + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9313047211019367, + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9311406569876187, + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9392038901097501, + "model": "google/gemma-3-27b-it", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9504743930445531, + "model": "qwen/qwq-32b", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9950087915805451, + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9283998656503502, + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9934034758807603, + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9131528589305679, + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9917679206284817, + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9566767123929576, + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359924521743563, + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8893588081911743, + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9583698738001583, + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9934034758807603, + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.946392812169666, + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.911875333930421, + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9169315433407361, + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9541325707307038, + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9648123726963476, + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8370298547932784, + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9334875203861144, + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9413496332501932, + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9956823103485622, + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9457390517164731, + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9671298665063969, + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9336521523423332, + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9502062892893858, + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9333019767772176, + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9037394051488277, + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9283644587512466, + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9237582925385585, + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8995566191566017, + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.896344147038989, + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.09821094254330615, + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9548273305811203, + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9251737690567995, + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9275689564213165, + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9272442008199501, - "sentence_nr": 9 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9520060001290835, - "sentence_nr": 9 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9058859200742604, - "sentence_nr": 9 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8789724147701462, - "sentence_nr": 9 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9137645544850267, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8969027357279203, - "sentence_nr": 9 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9520060001290835, - "sentence_nr": 9 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9275374047069039, - "sentence_nr": 9 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8772309014828462, - "sentence_nr": 9 + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9140052999897977, - "sentence_nr": 9 + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.868350408637765, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7563541659131354, - "sentence_nr": 9 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8578315979157695, - "sentence_nr": 9 + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8441075622700097, - "sentence_nr": 9 + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.23829288001976573, - "sentence_nr": 9 + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9407267756704489, - "sentence_nr": 9 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.831845583109951, - "sentence_nr": 9 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9530684796567226, - "sentence_nr": 9 + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8984174935165463, - "sentence_nr": 9 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.946008414943598, - "sentence_nr": 9 + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9285885624039975, - "sentence_nr": 9 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9645189965938258, - "sentence_nr": 9 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9601667560566091, - "sentence_nr": 9 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9113133701465544, - "sentence_nr": 9 + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9363094557613988, - "sentence_nr": 9 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9499594621802195, - "sentence_nr": 9 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8850558582872771, - "sentence_nr": 9 + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9413520522974334, - "sentence_nr": 9 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8953760832780698, - "sentence_nr": 9 + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9516191368774216, - "sentence_nr": 9 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.873135905690596, - "sentence_nr": 9 + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9331628274049639, - "sentence_nr": 9 + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9350921637704382, - "sentence_nr": 9 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9398175409358328, - "sentence_nr": 9 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9675093986501344, - "sentence_nr": 9 + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", + "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9282207391671503, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.957452925924953, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8937237551170429, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-lite-001", + "model": "google/gemma-3-27b-it", "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", + "model": "qwen/qwq-32b", "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9256331955884847, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.904390835311888, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8995954000535624, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.1-70b-instruct", + "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3-70b-instruct", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.928962868887516, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-small-24b-instruct-2501", + "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9339798045072082, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", + "model": "mistralai/mistral-small-3.1-24b-instruct", "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8008809042180175, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-lite-001", + "model": "google/gemma-3-27b-it", "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", + "model": "qwen/qwq-32b", "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9240001424211951, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", + "model": "deepseek/deepseek-chat-v3-0324", "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.3493344613894351, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.92829327413418, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359271530286619, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9641555435524619, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.90719289051837, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8543701176038877, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9045960456690756, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9576659929734302, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9445842802137389, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917893569547509, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9031282594956593, - "sentence_nr": 10 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9325823323160847, - "sentence_nr": 10 + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9171277146973622, - "sentence_nr": 10 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9631220314707449, - "sentence_nr": 10 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9125575210703364, - "sentence_nr": 10 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9671298665063969, - "sentence_nr": 10 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8583796678495444, - "sentence_nr": 10 + "model": "google/gemma-3-27b-it", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917870378110458, - "sentence_nr": 10 + "model": "qwen/qwq-32b", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9075511178990168, - "sentence_nr": 10 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8942877287874674, - "sentence_nr": 10 + "model": "amazon/nova-micro-v1", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8881782096383685, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8452994228892592, - "sentence_nr": 11 + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.857664755026069, - "sentence_nr": 11 + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7687402404428638, - "sentence_nr": 11 + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9425182378610694, - "sentence_nr": 11 + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8221659843346086, - "sentence_nr": 11 + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8685375697135141, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7860944644568774, - "sentence_nr": 11 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7829829019188287, - "sentence_nr": 11 + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9425182378610694, - "sentence_nr": 11 + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.289269703803095, - "sentence_nr": 11 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7829829019188287, - "sentence_nr": 11 + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9425182378610694, - "sentence_nr": 11 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.915813486906383, - "sentence_nr": 11 + "model": "amazon/nova-micro-v1", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-4-maverick", "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.928671169616198, - "sentence_nr": 11 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9195852720074569, - "sentence_nr": 11 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9260563505342738, - "sentence_nr": 11 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8580715674095071, - "sentence_nr": 11 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8991782906832555, - "sentence_nr": 11 + "model": "google/gemma-3-27b-it", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "model": "qwen/qwq-32b", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9549429726485847, - "sentence_nr": 11 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8571447284090962, - "sentence_nr": 11 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.953599772014362, - "sentence_nr": 11 + "model": "amazon/nova-micro-v1", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9382091007325469, - "sentence_nr": 11 + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9125682774652475, - "sentence_nr": 11 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9084959093441131, - "sentence_nr": 11 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "model": "google/gemma-3-27b-it", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 + "model": "qwen/qwq-32b", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9389584881035126, - "sentence_nr": 11 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8968120926569282, - "sentence_nr": 11 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8757339860702672, - "sentence_nr": 11 + "model": "amazon/nova-micro-v1", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9742989957563788, - "sentence_nr": 12 + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9854564066904739, - "sentence_nr": 12 + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.938338375356983, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9363458435045497, - "sentence_nr": 12 + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9275189832478317, - "sentence_nr": 12 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9680610688075657, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9458276502828801, - "sentence_nr": 12 + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9555270393882619, - "sentence_nr": 12 + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.774972667720128, - "sentence_nr": 12 + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9238483556315539, - "sentence_nr": 12 + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9292605756517186, - "sentence_nr": 12 + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8710905917506855, - "sentence_nr": 12 + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8877998658561537, - "sentence_nr": 12 + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9408832971568818, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8758560882945217, - "sentence_nr": 12 + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9047504210526172, - "sentence_nr": 12 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9149458726191051, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9497380252636716, - "sentence_nr": 12 + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9247145535687903, - "sentence_nr": 12 + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 12 + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8829314518141973, - "sentence_nr": 12 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9497380252636716, - "sentence_nr": 12 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9369900232316837, - "sentence_nr": 12 + "model": "amazon/nova-micro-v1", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9584772514045287, - "sentence_nr": 12 + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9656526051593539, - "sentence_nr": 12 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9262800142753679, - "sentence_nr": 12 + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9178799098053634, - "sentence_nr": 12 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "google/gemini-2.0-flash-001", "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8988056403515298, - "sentence_nr": 12 + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9240902217687106, - "sentence_nr": 12 + "model": "google/gemma-3-27b-it", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 + "model": "qwen/qwq-32b", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9454713149117651, - "sentence_nr": 12 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9457650793019858, - "sentence_nr": 12 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9115531547253959, - "sentence_nr": 12 + "model": "amazon/nova-micro-v1", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9403725471773088, - "sentence_nr": 12 + "model": "openai/gpt-4o-mini", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9107758326980321, - "sentence_nr": 12 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9251111872988325, - "sentence_nr": 12 + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9267004903727016, - "sentence_nr": 12 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9652440580136615, - "sentence_nr": 12 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.924254800539438, - "sentence_nr": 12 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9054967244578502, - "sentence_nr": 12 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.886673201587762, - "sentence_nr": 12 + "model": "google/gemma-3-27b-it", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9082204179924286, - "sentence_nr": 12 + "model": "qwen/qwq-32b", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665046359304257, - "sentence_nr": 12 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9492870842156111, - "sentence_nr": 12 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 + "model": "amazon/nova-micro-v1", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9495327576081029, - "sentence_nr": 12 + "model": "openai/gpt-4o-mini", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9605742681789634, - "sentence_nr": 13 + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9410712595774171, - "sentence_nr": 13 + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.971921146040729, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8360964435901039, - "sentence_nr": 13 + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9278436686065653, - "sentence_nr": 13 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9540941235545723, - "sentence_nr": 13 + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7765803419515074, - "sentence_nr": 13 + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9368660209060221, - "sentence_nr": 13 + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9179315685239186, - "sentence_nr": 13 + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9198867501155861, - "sentence_nr": 13 + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9357668560693397, - "sentence_nr": 13 + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.880651835588671, - "sentence_nr": 13 + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9322025130978147, - "sentence_nr": 13 + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8666701669384438, - "sentence_nr": 13 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9301584319196643, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9201441893603447, - "sentence_nr": 13 + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4518476286184633, - "sentence_nr": 13 + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8817151383770689, - "sentence_nr": 13 + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 + "model": "openai/gpt-4o-mini", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9719892276800867, - "sentence_nr": 13 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9232252378020026, - "sentence_nr": 13 + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.90340499273861, - "sentence_nr": 13 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9445601279006905, - "sentence_nr": 13 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9284637794790105, - "sentence_nr": 13 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9506720475284802, - "sentence_nr": 13 + "model": "google/gemma-3-27b-it", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 + "model": "qwen/qwq-32b", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9650672132857259, - "sentence_nr": 13 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.935825271074837, - "sentence_nr": 13 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9417006532894496, - "sentence_nr": 13 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9180957642017807, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9336273124319283, - "sentence_nr": 13 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9199623581249377, - "sentence_nr": 13 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9420383150390214, - "sentence_nr": 13 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9169222881606529, - "sentence_nr": 13 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9358954768171188, - "sentence_nr": 13 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9210475526688618, - "sentence_nr": 13 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.900422383617428, - "sentence_nr": 13 + "model": "google/gemma-3-27b-it", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665042848270522, - "sentence_nr": 13 + "model": "qwen/qwq-32b", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9195975724156285, - "sentence_nr": 13 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9482591669689567, - "sentence_nr": 13 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.894400898846725, - "sentence_nr": 13 + "model": "amazon/nova-micro-v1", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9683895601588671, - "sentence_nr": 14 + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.974733551222386, - "sentence_nr": 14 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 14 + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.935724475087967, - "sentence_nr": 14 + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.945278116491169, - "sentence_nr": 14 + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.913976993531483, - "sentence_nr": 14 + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9206503738833902, - "sentence_nr": 14 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8879551150411227, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9093507960484853, - "sentence_nr": 14 + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.44325871778061554, - "sentence_nr": 14 + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8629899790604912, - "sentence_nr": 14 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8839868610728687, - "sentence_nr": 14 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9659019608247615, - "sentence_nr": 14 + "model": "amazon/nova-micro-v1", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "model": "openai/gpt-4o-mini", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9612040783142544, - "sentence_nr": 14 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9355702448711621, - "sentence_nr": 14 + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8575724679460186, - "sentence_nr": 14 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.919154316989783, - "sentence_nr": 14 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9107041155041439, - "sentence_nr": 14 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8860042875765471, - "sentence_nr": 14 + "model": "google/gemma-3-27b-it", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "model": "qwen/qwq-32b", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9163443895096822, - "sentence_nr": 14 + "model": "amazon/nova-micro-v1", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "model": "openai/gpt-4o-mini", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9513360683724416, - "sentence_nr": 14 + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9506442510575418, - "sentence_nr": 14 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9210869399305139, - "sentence_nr": 14 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8602965545640948, - "sentence_nr": 14 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8912610518101419, - "sentence_nr": 14 + "model": "google/gemma-3-27b-it", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.857937519719319, - "sentence_nr": 14 + "model": "qwen/qwq-32b", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9528771181894694, - "sentence_nr": 14 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9241995664234885, - "sentence_nr": 14 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9432104991415542, - "sentence_nr": 14 + "model": "amazon/nova-micro-v1", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8927784164557715, - "sentence_nr": 14 + "model": "openai/gpt-4o-mini", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8681309346882299, - "sentence_nr": 15 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9045257596276787, - "sentence_nr": 15 + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7552111299277484, - "sentence_nr": 15 + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.82396628763246, - "sentence_nr": 15 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 15 + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8809116426093319, - "sentence_nr": 15 + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9069369532463243, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8255413975339149, - "sentence_nr": 15 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9116712045344968, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8855094439275503, - "sentence_nr": 15 + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8809116426093319, - "sentence_nr": 15 + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5085021700346579, - "sentence_nr": 15 + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8155954216287978, - "sentence_nr": 15 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8849766832597384, - "sentence_nr": 15 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9321985099431636, - "sentence_nr": 15 + "model": "amazon/nova-micro-v1", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9158869153954171, - "sentence_nr": 15 + "model": "openai/gpt-4o-mini", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8940299169999223, - "sentence_nr": 15 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9029209331114941, - "sentence_nr": 15 + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9434784706316768, - "sentence_nr": 15 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9504499063681887, - "sentence_nr": 15 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8103402263404181, - "sentence_nr": 15 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "model": "google/gemma-3-27b-it", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9033542015144801, - "sentence_nr": 15 + "model": "qwen/qwq-32b", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8920851535963175, - "sentence_nr": 15 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9012698346023688, - "sentence_nr": 15 + "model": "amazon/nova-micro-v1", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "model": "openai/gpt-4o-mini", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8815241253287673, - "sentence_nr": 15 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.955434974676454, - "sentence_nr": 15 + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9190034267575142, - "sentence_nr": 15 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9028341607528202, - "sentence_nr": 15 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "google/gemma-3-27b-it", "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7933760889502307, - "sentence_nr": 15 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "model": "qwen/qwq-32b", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9669111778196173, - "sentence_nr": 15 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9183552099282611, - "sentence_nr": 15 + "model": "amazon/nova-micro-v1", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9213964969470535, - "sentence_nr": 15 + "model": "openai/gpt-4o-mini", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9438561056375272, - "sentence_nr": 16 + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9245427558640842, - "sentence_nr": 16 + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9466217999433078, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8600910973378976, - "sentence_nr": 16 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5465479162881712, - "sentence_nr": 16 + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.908088143295894, - "sentence_nr": 16 + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8689979953554426, - "sentence_nr": 16 + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8837997874830685, - "sentence_nr": 16 + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9430526976186369, - "sentence_nr": 16 + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7213258253735133, - "sentence_nr": 16 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8583796678495444, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5352913894873965, - "sentence_nr": 16 + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7506613813658406, - "sentence_nr": 16 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9120029292560927, - "sentence_nr": 16 + "model": "amazon/nova-micro-v1", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.969258616291086, - "sentence_nr": 16 + "model": "openai/gpt-4o-mini", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359933426460225, - "sentence_nr": 16 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8446197069920836, - "sentence_nr": 16 + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665537794677691, - "sentence_nr": 16 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7519024768911576, - "sentence_nr": 16 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9419599049218603, - "sentence_nr": 16 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9100379761498075, - "sentence_nr": 16 + "model": "google/gemma-3-27b-it", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9243062555931161, - "sentence_nr": 16 + "model": "qwen/qwq-32b", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9232535952320629, - "sentence_nr": 16 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9430158926147498, - "sentence_nr": 16 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8923268998495886, - "sentence_nr": 16 + "model": "amazon/nova-micro-v1", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9316958873367511, - "sentence_nr": 16 + "model": "openai/gpt-4o-mini", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9441083273271286, - "sentence_nr": 16 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899852954654377, - "sentence_nr": 16 + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9451690574618664, - "sentence_nr": 16 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9470556595464068, - "sentence_nr": 16 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8625414653847894, - "sentence_nr": 16 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8658510104009289, - "sentence_nr": 16 + "model": "google/gemma-3-27b-it", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.938651167013012, - "sentence_nr": 16 + "model": "qwen/qwq-32b", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9362303281043904, - "sentence_nr": 16 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "amazon/nova-micro-v1", "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", + "model": "openai/gpt-4o-mini", "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9288883358178652, - "sentence_nr": 16 + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7378741057437793, - "sentence_nr": 17 + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.898904151376881, - "sentence_nr": 17 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 17 + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8446522700991944, - "sentence_nr": 17 + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9053865214400596, - "sentence_nr": 17 + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8700885813654318, - "sentence_nr": 17 + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9331139325257429, - "sentence_nr": 17 + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8860497305091617, - "sentence_nr": 17 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8222704990602537, - "sentence_nr": 17 + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8402559609277754, - "sentence_nr": 17 + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7386088026745246, - "sentence_nr": 17 + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.06557474419143802, - "sentence_nr": 17 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8700885813654318, - "sentence_nr": 17 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "model": "amazon/nova-micro-v1", + "bcp_47": "th", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "model": "openai/gpt-4o-mini", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8771568927591851, - "sentence_nr": 17 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8869070241487921, - "sentence_nr": 17 + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8173012945645394, - "sentence_nr": 17 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "model": "google/gemma-3-27b-it", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8220012279932035, - "sentence_nr": 17 + "model": "qwen/qwq-32b", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8449397341788647, - "sentence_nr": 17 + "model": "amazon/nova-micro-v1", + "bcp_47": "kn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9239069749524619, - "sentence_nr": 17 + "model": "openai/gpt-4o-mini", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8972504357155736, - "sentence_nr": 17 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6602446784708298, - "sentence_nr": 17 + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8667833154965509, - "sentence_nr": 17 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7306831212016971, - "sentence_nr": 17 + "model": "google/gemma-3-27b-it", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7306831212016971, - "sentence_nr": 17 + "model": "qwen/qwq-32b", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7406377967705062, - "sentence_nr": 17 + "model": "amazon/nova-micro-v1", + "bcp_47": "ml", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8509760908759664, - "sentence_nr": 17 + "model": "openai/gpt-4o-mini", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.96926930549605, - "sentence_nr": 18 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8641726957145408, - "sentence_nr": 18 + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9637804258017773, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9240863542577373, - "sentence_nr": 18 + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9450374119495017, - "sentence_nr": 18 + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.96926930549605, - "sentence_nr": 18 + "bcp_47": "or", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424882191492142, - "sentence_nr": 18 + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424882191492142, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9392663489644577, - "sentence_nr": 18 + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8389799674466019, - "sentence_nr": 18 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9253208187778743, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221577416896909, - "sentence_nr": 18 + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221577416896909, - "sentence_nr": 18 + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 18 + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9144266092886102, - "sentence_nr": 18 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9500117624130617, - "sentence_nr": 18 + "model": "amazon/nova-micro-v1", + "bcp_47": "pl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 + "model": "openai/gpt-4o-mini", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.905862662289465, - "sentence_nr": 18 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9003734503251455, - "sentence_nr": 18 + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.858544407149412, - "sentence_nr": 18 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9281598514152588, - "sentence_nr": 18 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.948121913854874, - "sentence_nr": 18 + "model": "google/gemma-3-27b-it", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9629589146416885, - "sentence_nr": 18 + "model": "qwen/qwq-32b", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9580736862318411, - "sentence_nr": 18 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9708835294542548, - "sentence_nr": 18 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9234823141384267, - "sentence_nr": 18 + "model": "amazon/nova-micro-v1", + "bcp_47": "ha", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9939521304203686, - "sentence_nr": 18 + "model": "openai/gpt-4o-mini", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9474838221026617, - "sentence_nr": 18 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9488355997601815, - "sentence_nr": 18 + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424390135303181, - "sentence_nr": 18 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9711070259637357, - "sentence_nr": 18 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9237920416869381, - "sentence_nr": 18 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8942780008373756, - "sentence_nr": 18 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8468261925085733, - "sentence_nr": 18 + "model": "google/gemma-3-27b-it", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8976119317111001, - "sentence_nr": 18 + "model": "qwen/qwq-32b", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9527352893094178, - "sentence_nr": 18 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9510981354135275, - "sentence_nr": 18 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9521144628004171, - "sentence_nr": 18 + "model": "amazon/nova-micro-v1", + "bcp_47": "sd", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 + "model": "openai/gpt-4o-mini", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9774592733638915, - "sentence_nr": 19 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665042848270522, - "sentence_nr": 19 + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9806060444395596, - "sentence_nr": 19 + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9049668032095894, - "sentence_nr": 19 + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665042848270522, - "sentence_nr": 19 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9667317239059525, - "sentence_nr": 19 + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 19 + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9159800198090925, - "sentence_nr": 19 + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9667317239059525, - "sentence_nr": 19 + "bcp_47": "ms", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8925738398388144, - "sentence_nr": 19 + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9058585844143391, - "sentence_nr": 19 + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8888787903169728, - "sentence_nr": 19 + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8925738398388144, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9128855680689195, - "sentence_nr": 19 + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9272821491047395, - "sentence_nr": 19 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9413354408985303, - "sentence_nr": 19 + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.46619006556188114, - "sentence_nr": 19 + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.913896513382741, - "sentence_nr": 19 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9058585844143391, - "sentence_nr": 19 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9557922260754473, - "sentence_nr": 19 + "model": "amazon/nova-micro-v1", + "bcp_47": "my", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9391656780027514, - "sentence_nr": 19 + "model": "openai/gpt-4o-mini", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9260113686541587, - "sentence_nr": 19 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9419307613884336, - "sentence_nr": 19 + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9845996986850503, - "sentence_nr": 19 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9255228522887315, - "sentence_nr": 19 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.889174440461237, - "sentence_nr": 19 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9496761617043387, - "sentence_nr": 19 + "model": "google/gemma-3-27b-it", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9322360743819351, - "sentence_nr": 19 + "model": "qwen/qwq-32b", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.935492418630274, - "sentence_nr": 19 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9456325305487512, - "sentence_nr": 19 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9325466173278317, - "sentence_nr": 19 + "model": "amazon/nova-micro-v1", + "bcp_47": "am", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 + "model": "openai/gpt-4o-mini", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9240800356922247, - "sentence_nr": 19 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9361690788124847, - "sentence_nr": 19 + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.938043640398588, - "sentence_nr": 19 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.901373116210745, - "sentence_nr": 19 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9531605377803356, - "sentence_nr": 19 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9132591460407243, - "sentence_nr": 19 + "model": "google/gemma-3-27b-it", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 + "model": "qwen/qwq-32b", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9494481589794223, - "sentence_nr": 19 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9415361564397403, - "sentence_nr": 19 + "model": "amazon/nova-micro-v1", + "bcp_47": "om", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.897450557161678, - "sentence_nr": 19 + "model": "openai/gpt-4o-mini", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.937002127196651, - "sentence_nr": 20 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9435408381256087, - "sentence_nr": 20 + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9421449698305296, - "sentence_nr": 20 + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9607456319189528, - "sentence_nr": 20 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 + "model": "google/gemma-3-27b-it", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 + "model": "qwen/qwq-32b", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5467617051776391, - "sentence_nr": 20 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 + "bcp_47": "bho", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8969209805167669, - "sentence_nr": 20 + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9060555921929084, - "sentence_nr": 20 + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8969209805167669, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9096430262961498, - "sentence_nr": 20 + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7983940190154283, - "sentence_nr": 20 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9132591460407243, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9204057102575467, - "sentence_nr": 20 + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4968312722246179, - "sentence_nr": 20 + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8001971757912975, - "sentence_nr": 20 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9204057102575467, - "sentence_nr": 20 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.95112146871187, - "sentence_nr": 20 + "model": "amazon/nova-micro-v1", + "bcp_47": "uz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.949624286506194, - "sentence_nr": 20 + "model": "openai/gpt-4o-mini", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9588139991437585, - "sentence_nr": 20 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9236414681715879, - "sentence_nr": 20 + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9453633691396565, - "sentence_nr": 20 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9278367059866518, - "sentence_nr": 20 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9302237306555959, - "sentence_nr": 20 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8441460025255829, - "sentence_nr": 20 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 + "model": "google/gemma-3-27b-it", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9470556595464068, - "sentence_nr": 20 + "model": "qwen/qwq-32b", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.951863030034636, - "sentence_nr": 20 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8944443568631728, - "sentence_nr": 20 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9082204179924286, - "sentence_nr": 20 + "model": "amazon/nova-micro-v1", + "bcp_47": "az", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 + "model": "openai/gpt-4o-mini", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.90717359411325, - "sentence_nr": 20 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9014597856352894, - "sentence_nr": 20 + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9250084453288043, - "sentence_nr": 20 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.95462554022758, - "sentence_nr": 20 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9318340131711181, - "sentence_nr": 20 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9736147802901586, - "sentence_nr": 20 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9182449217144187, - "sentence_nr": 20 + "model": "google/gemma-3-27b-it", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9400180064454685, - "sentence_nr": 20 + "model": "qwen/qwq-32b", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9255769217104873, - "sentence_nr": 20 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9309426923102619, - "sentence_nr": 20 + "model": "amazon/nova-micro-v1", + "bcp_47": "su", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9002012094811458, - "sentence_nr": 20 + "model": "openai/gpt-4o-mini", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9690017425712892, - "sentence_nr": 21 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6924365679057801, - "sentence_nr": 21 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.804543317337012, - "sentence_nr": 21 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8385395593542468, - "sentence_nr": 21 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9515560914045473, - "sentence_nr": 21 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.854435717190483, - "sentence_nr": 21 + "bcp_47": "nl", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7570244995532351, - "sentence_nr": 21 + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6676892344393273, - "sentence_nr": 21 + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.873135905690596, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6885773376269438, - "sentence_nr": 21 + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.46961217063286037, - "sentence_nr": 21 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8379214027434272, - "sentence_nr": 21 + "model": "google/gemma-3-27b-it", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7570244995532351, - "sentence_nr": 21 + "model": "qwen/qwq-32b", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.20981645725460496, - "sentence_nr": 21 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6659995521111991, - "sentence_nr": 21 + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7570244995532351, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8388678282825207, - "sentence_nr": 21 + "bcp_47": "ary", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9180596829241628, - "sentence_nr": 21 + "model": "openai/gpt-4o-mini", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9166274634412449, - "sentence_nr": 21 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8626786769008709, - "sentence_nr": 21 + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7991709881281639, - "sentence_nr": 21 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8119656541607598, - "sentence_nr": 21 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "model": "google/gemma-3-27b-it", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8872308158649556, - "sentence_nr": 21 + "model": "qwen/qwq-32b", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8914910756561332, - "sentence_nr": 21 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "model": "amazon/nova-micro-v1", + "bcp_47": "uk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.927494511055529, - "sentence_nr": 21 + "model": "openai/gpt-4o-mini", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9528614248210486, - "sentence_nr": 21 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8523282278495175, - "sentence_nr": 21 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9297633204435644, - "sentence_nr": 21 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "model": "google/gemma-3-27b-it", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "model": "qwen/qwq-32b", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9278042759794851, - "sentence_nr": 21 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8998995790099074, - "sentence_nr": 21 + "model": "amazon/nova-micro-v1", + "bcp_47": "yo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9302677881301988, - "sentence_nr": 21 + "model": "openai/gpt-4o-mini", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9522511234396616, - "sentence_nr": 22 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7585159184184324, - "sentence_nr": 22 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8922770448230282, - "sentence_nr": 22 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9126128133576369, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6245412677586388, - "sentence_nr": 22 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.868233862673363, - "sentence_nr": 22 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8852329532489643, - "sentence_nr": 22 + "bcp_47": "ig", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8378994642516495, - "sentence_nr": 22 + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8775848642818888, - "sentence_nr": 22 + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8493237569441244, - "sentence_nr": 22 + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9448292727000915, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8555426729178464, - "sentence_nr": 22 + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7558344174949267, - "sentence_nr": 22 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8497451239178159, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8493237569441244, - "sentence_nr": 22 + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8199763712080639, - "sentence_nr": 22 + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 22 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8206722459046871, - "sentence_nr": 22 + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.883570112979728, - "sentence_nr": 22 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.914786293186172, - "sentence_nr": 22 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "model": "amazon/nova-micro-v1", + "bcp_47": "ceb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8845568645036501, - "sentence_nr": 22 + "model": "openai/gpt-4o-mini", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8937192042814042, - "sentence_nr": 22 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.901348698020278, - "sentence_nr": 22 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8382013802825361, - "sentence_nr": 22 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9219786709510569, - "sentence_nr": 22 + "model": "google/gemma-3-27b-it", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8767649499531999, - "sentence_nr": 22 + "model": "qwen/qwq-32b", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9094880423990607, - "sentence_nr": 22 + "model": "amazon/nova-micro-v1", + "bcp_47": "awa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8719390074611821, - "sentence_nr": 22 + "model": "openai/gpt-4o-mini", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9349020382990011, - "sentence_nr": 22 + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9272997117562144, - "sentence_nr": 22 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8962185446474815, - "sentence_nr": 22 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8930034245249151, - "sentence_nr": 22 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9271664513693498, - "sentence_nr": 22 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "model": "google/gemma-3-27b-it", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 + "model": "qwen/qwq-32b", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8936606750264663, - "sentence_nr": 22 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8803360259381345, - "sentence_nr": 22 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8680210960657176, - "sentence_nr": 22 + "model": "amazon/nova-micro-v1", + "bcp_47": "mg", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9699436870249787, - "sentence_nr": 23 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9699436870249787, - "sentence_nr": 23 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7931982206364059, - "sentence_nr": 23 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9699436870249787, - "sentence_nr": 23 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9438398456065387, - "sentence_nr": 23 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9281186022380125, - "sentence_nr": 23 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9699436870249787, - "sentence_nr": 23 + "bcp_47": "ro", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9184823166209557, - "sentence_nr": 23 + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8839868610728687, - "sentence_nr": 23 + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8884834862973964, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9300073119656489, - "sentence_nr": 23 + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9381606131991436, - "sentence_nr": 23 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8839868610728687, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9300073119656489, - "sentence_nr": 23 + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4072337657555589, - "sentence_nr": 23 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9184823166209557, - "sentence_nr": 23 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9293646790023864, - "sentence_nr": 23 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9511392272878579, - "sentence_nr": 23 + "model": "amazon/nova-micro-v1", + "bcp_47": "ne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9293879632586071, - "sentence_nr": 23 + "model": "openai/gpt-4o-mini", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9277950353049101, - "sentence_nr": 23 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8843378183459343, - "sentence_nr": 23 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8741633139531418, - "sentence_nr": 23 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9271525909282003, - "sentence_nr": 23 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9736840552120738, - "sentence_nr": 23 + "model": "google/gemma-3-27b-it", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9396084767892234, - "sentence_nr": 23 + "model": "qwen/qwq-32b", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9187563342696414, - "sentence_nr": 23 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8896752045577786, - "sentence_nr": 23 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9737097349915758, - "sentence_nr": 23 + "model": "amazon/nova-micro-v1", + "bcp_47": "mai", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9330058893011377, - "sentence_nr": 23 + "model": "openai/gpt-4o-mini", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9570066548501687, - "sentence_nr": 23 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9201684039669155, - "sentence_nr": 23 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9133901345922595, - "sentence_nr": 23 + "model": "google/gemma-3-27b-it", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 + "model": "qwen/qwq-32b", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9458636432813123, - "sentence_nr": 23 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917857433142856, - "sentence_nr": 23 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9375412439691305, - "sentence_nr": 23 + "model": "amazon/nova-micro-v1", + "bcp_47": "as", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9556267474396976, - "sentence_nr": 24 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9489054429933926, - "sentence_nr": 24 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9489054429933926, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8327628422929998, - "sentence_nr": 24 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9249365863966041, - "sentence_nr": 24 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.922528755167094, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9486938895906879, - "sentence_nr": 24 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 24 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8620685016584069, - "sentence_nr": 24 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9405916043682414, - "sentence_nr": 24 + "bcp_47": "ny", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9327915990783561, - "sentence_nr": 24 + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.909738029095061, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.909738029095061, - "sentence_nr": 24 + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8707492337114523, - "sentence_nr": 24 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.95883735444933, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9455007606735264, - "sentence_nr": 24 + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9372630850025364, - "sentence_nr": 24 + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5983897920478856, - "sentence_nr": 24 + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9299762198228243, - "sentence_nr": 24 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9302303599426779, - "sentence_nr": 24 + "model": "amazon/nova-micro-v1", + "bcp_47": "so", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 + "model": "openai/gpt-4o-mini", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9544609413449265, - "sentence_nr": 24 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9355306533611718, - "sentence_nr": 24 + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9432457481338326, - "sentence_nr": 24 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9472285181144658, - "sentence_nr": 24 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.923828763793418, - "sentence_nr": 24 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9224761498105726, - "sentence_nr": 24 + "model": "google/gemma-3-27b-it", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9756278595118478, - "sentence_nr": 24 + "model": "qwen/qwq-32b", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9499594621802195, - "sentence_nr": 24 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9544238060448419, - "sentence_nr": 24 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9012364553153411, - "sentence_nr": 24 + "model": "amazon/nova-micro-v1", + "bcp_47": "mag", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8199585012210312, - "sentence_nr": 24 + "model": "openai/gpt-4o-mini", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9280048312907723, - "sentence_nr": 24 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9587462450914201, - "sentence_nr": 24 + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8938919301593574, - "sentence_nr": 24 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9507758066685948, - "sentence_nr": 24 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9432005035367906, - "sentence_nr": 24 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9675203656708941, - "sentence_nr": 24 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9303385434730891, - "sentence_nr": 24 + "model": "google/gemma-3-27b-it", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 + "model": "qwen/qwq-32b", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9401106918306472, - "sentence_nr": 24 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9533532275954528, - "sentence_nr": 24 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9274629860503822, - "sentence_nr": 24 + "model": "amazon/nova-micro-v1", + "bcp_47": "sr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8926908826740254, - "sentence_nr": 24 + "model": "openai/gpt-4o-mini", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6224897798032885, - "sentence_nr": 25 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7893575827661004, - "sentence_nr": 25 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 25 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9478696521177714, - "sentence_nr": 25 + "bcp_47": "si", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7160421907140165, - "sentence_nr": 25 + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6217685026572488, - "sentence_nr": 25 + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.794919886900137, - "sentence_nr": 25 + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8793006100154936, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5916523997385489, - "sentence_nr": 25 + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4849269488253923, - "sentence_nr": 25 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7462718113811923, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8083701726292805, - "sentence_nr": 25 + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.722502153449955, - "sentence_nr": 25 + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 25 + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5544920599877754, - "sentence_nr": 25 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6853792233736985, - "sentence_nr": 25 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9200538056807258, - "sentence_nr": 25 + "model": "amazon/nova-micro-v1", + "bcp_47": "km", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9630774769374594, - "sentence_nr": 25 + "model": "openai/gpt-4o-mini", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9143443086107108, - "sentence_nr": 25 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9052744049140443, - "sentence_nr": 25 + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9018850910676268, - "sentence_nr": 25 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9142574363760879, - "sentence_nr": 25 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9168431011517528, - "sentence_nr": 25 + "model": "google/gemma-3-27b-it", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9141901633008906, - "sentence_nr": 25 + "model": "qwen/qwq-32b", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "model": "amazon/nova-micro-v1", + "bcp_47": "hne", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9317477810881586, - "sentence_nr": 25 + "model": "openai/gpt-4o-mini", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9354759108346813, - "sentence_nr": 25 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9141453314674155, - "sentence_nr": 25 + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9550191440621234, - "sentence_nr": 25 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8402328635525613, - "sentence_nr": 25 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.831845583109951, - "sentence_nr": 25 + "model": "google/gemma-3-27b-it", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9092382099397807, - "sentence_nr": 25 + "model": "qwen/qwq-32b", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9770044719642067, - "sentence_nr": 25 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9155318202784664, - "sentence_nr": 25 + "model": "amazon/nova-micro-v1", + "bcp_47": "fuv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 + "model": "openai/gpt-4o-mini", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8272309965382391, - "sentence_nr": 26 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7767725512278205, - "sentence_nr": 26 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9373981486656514, - "sentence_nr": 26 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9579023880929557, - "sentence_nr": 26 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9268329536813669, - "sentence_nr": 26 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.904428807825769, - "sentence_nr": 26 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "bcp_47": "zu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9116613044583819, - "sentence_nr": 26 + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9084279839455062, - "sentence_nr": 26 + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8013174743750245, - "sentence_nr": 26 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.34811585804131506, - "sentence_nr": 26 + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8912610518101419, - "sentence_nr": 26 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9084279608664247, - "sentence_nr": 26 + "model": "amazon/nova-micro-v1", + "bcp_47": "kk", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "openai/gpt-4o-mini", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9342971539350323, - "sentence_nr": 26 + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9618018909441389, - "sentence_nr": 26 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221850850049388, - "sentence_nr": 26 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "google/gemma-3-27b-it", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "qwen/qwq-32b", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "amazon/nova-micro-v1", + "bcp_47": "cs", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "openai/gpt-4o-mini", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9621502301102783, - "sentence_nr": 26 + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9207497282487874, - "sentence_nr": 26 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8817316559043479, - "sentence_nr": 26 + "model": "google/gemma-3-27b-it", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "qwen/qwq-32b", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9595521389704431, - "sentence_nr": 26 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9604273088099046, - "sentence_nr": 26 + "model": "amazon/nova-micro-v1", + "bcp_47": "sv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8832167531630292, - "sentence_nr": 26 + "model": "openai/gpt-4o-mini", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9618018909441389, - "sentence_nr": 27 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9338423795983638, - "sentence_nr": 27 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8069582822584229, - "sentence_nr": 27 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9432051372011929, - "sentence_nr": 27 + "bcp_47": "hu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8875472267363329, - "sentence_nr": 27 + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8538919155402751, - "sentence_nr": 27 + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8601111478550084, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8274840531521687, - "sentence_nr": 27 + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8737243337458652, - "sentence_nr": 27 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8875472267363329, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8213297311895551, - "sentence_nr": 27 + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.3007622907436899, - "sentence_nr": 27 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.906379768806771, - "sentence_nr": 27 + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8996352283472103, - "sentence_nr": 27 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8577239523880982, - "sentence_nr": 27 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9705288278234159, - "sentence_nr": 27 + "model": "amazon/nova-micro-v1", + "bcp_47": "el", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9022302698191352, - "sentence_nr": 27 + "model": "openai/gpt-4o-mini", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9618116705103616, - "sentence_nr": 27 + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9282902444420971, - "sentence_nr": 27 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9283062281157928, - "sentence_nr": 27 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9143841728614055, - "sentence_nr": 27 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9148205155364358, - "sentence_nr": 27 + "model": "google/gemma-3-27b-it", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9429357495928096, - "sentence_nr": 27 + "model": "qwen/qwq-32b", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8199038085123204, - "sentence_nr": 27 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9007500710615358, - "sentence_nr": 27 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9335504867261654, - "sentence_nr": 27 + "model": "amazon/nova-micro-v1", + "bcp_47": "sn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8519148326217993, - "sentence_nr": 27 + "model": "openai/gpt-4o-mini", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9467340802817513, - "sentence_nr": 27 + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8914166352994622, - "sentence_nr": 27 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8697448206881571, - "sentence_nr": 27 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9091527400737927, - "sentence_nr": 27 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "model": "google/gemma-3-27b-it", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9487286082082608, - "sentence_nr": 27 + "model": "qwen/qwq-32b", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9550331732946552, - "sentence_nr": 27 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "ckb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9703747509928279, - "sentence_nr": 28 + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9540941235545723, - "sentence_nr": 28 + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9543144589160125, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.652649628941592, - "sentence_nr": 28 + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9767775472269087, - "sentence_nr": 28 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9393628940364738, - "sentence_nr": 28 + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9410712595774171, - "sentence_nr": 28 + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6444379795256558, - "sentence_nr": 28 + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8964898605551818, - "sentence_nr": 28 + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9263597385884417, - "sentence_nr": 28 + "bcp_47": "rw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899546929868499, - "sentence_nr": 28 + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899546929868499, - "sentence_nr": 28 + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 28 + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.824741266541094, - "sentence_nr": 28 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 28 + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8283905649271065, - "sentence_nr": 28 + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 28 + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.881413837458117, - "sentence_nr": 28 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899546929868499, - "sentence_nr": 28 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9437940294094723, - "sentence_nr": 28 + "model": "amazon/nova-micro-v1", + "bcp_47": "wo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9269703177791706, - "sentence_nr": 28 + "model": "openai/gpt-4o-mini", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.944904344834561, - "sentence_nr": 28 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8479413107328494, - "sentence_nr": 28 + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9715595760527852, - "sentence_nr": 28 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8595969327963556, - "sentence_nr": 28 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9538713542813556, - "sentence_nr": 28 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8348508116391393, - "sentence_nr": 28 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9572462820044535, - "sentence_nr": 28 + "model": "google/gemma-3-27b-it", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 + "model": "qwen/qwq-32b", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9375119517314923, - "sentence_nr": 28 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9493167367596885, - "sentence_nr": 28 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 + "model": "amazon/nova-micro-v1", + "bcp_47": "aeb", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9344916654109876, - "sentence_nr": 28 + "model": "openai/gpt-4o-mini", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9849529115133767, - "sentence_nr": 28 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9275259780895282, - "sentence_nr": 28 + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9276874028790393, - "sentence_nr": 28 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9473074618830379, - "sentence_nr": 28 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9460494618521745, - "sentence_nr": 28 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8923268998495886, - "sentence_nr": 28 + "model": "google/gemma-3-27b-it", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9455007606735264, - "sentence_nr": 28 + "model": "qwen/qwq-32b", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9526558782357073, - "sentence_nr": 28 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9338345156544289, - "sentence_nr": 28 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8995764072227389, - "sentence_nr": 28 + "model": "amazon/nova-micro-v1", + "bcp_47": "ilo", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9505226544098013, - "sentence_nr": 28 + "model": "openai/gpt-4o-mini", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9630841609539229, - "sentence_nr": 29 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9451142647196181, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7510122845400926, - "sentence_nr": 29 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8090165300577936, - "sentence_nr": 29 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9543128468386116, - "sentence_nr": 29 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "bcp_47": "xh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.920197561569537, - "sentence_nr": 29 + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8759929746436435, - "sentence_nr": 29 + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8935424392990651, - "sentence_nr": 29 + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7769676399488106, - "sentence_nr": 29 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8875472267363329, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8664932988313133, - "sentence_nr": 29 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.906379768806771, - "sentence_nr": 29 + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8001297194719582, - "sentence_nr": 29 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9048724843551281, - "sentence_nr": 29 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8947987168857687, - "sentence_nr": 29 + "model": "amazon/nova-micro-v1", + "bcp_47": "ti", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9489238765618674, - "sentence_nr": 29 + "model": "openai/gpt-4o-mini", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.945278116491169, - "sentence_nr": 29 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8989194854163256, - "sentence_nr": 29 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9061728639858796, - "sentence_nr": 29 + "model": "google/gemma-3-27b-it", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9501419212325259, - "sentence_nr": 29 + "model": "qwen/qwq-32b", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.891206254843651, - "sentence_nr": 29 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9331628274049639, - "sentence_nr": 29 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "model": "amazon/nova-micro-v1", + "bcp_47": "be", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "model": "openai/gpt-4o-mini", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9046319474149982, - "sentence_nr": 29 + "model": "meta-llama/llama-4-maverick", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9463095328863311, - "sentence_nr": 29 + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9085828484030862, - "sentence_nr": 29 + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "model": "mistralai/mistral-small-3.1-24b-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8856061163721227, - "sentence_nr": 29 + "model": "google/gemini-2.0-flash-001", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "model": "google/gemma-3-27b-it", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "model": "qwen/qwq-32b", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9661878700572512, - "sentence_nr": 29 + "model": "deepseek/deepseek-chat-v3-0324", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 }, { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.908669313428767, - "sentence_nr": 29 + "model": "amazon/nova-micro-v1", + "bcp_47": "lua", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 } ] } \ No newline at end of file