[ { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5679608237702286, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.746881923400435, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4438455475739657, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6320800718582147, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5894973558751632, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.7562097956860054, "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3846086976522069, "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5835344719191324, "sentence_nr": 0 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4804215535486392, "sentence_nr": 0 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6694735319785804, "sentence_nr": 0 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.2511517944602615, "sentence_nr": 0 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4484633445384819, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5820808184424484, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.73788733854976, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5749603738163459, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.7240488251574404, "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5617561349997696, "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.7132694856647042, "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.2963216580569375, "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5101500486835966, "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.15317719477157257, "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.38800976493585004, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6001453932849357, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.762029391170019, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.30676942927198475, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4968492831219663, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.32063971770635635, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5206258401513325, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.39086127104761287, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6239956806265569, "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.3020679767949182, "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5246291817407542, "sentence_nr": 0 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.29261990846502584, "sentence_nr": 0 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5207965578474395, "sentence_nr": 0 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.23343658187420896, "sentence_nr": 0 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5188968707275573, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.2920008662633279, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.47119207959541226, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.2596939072050362, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4394574387008692, "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4273817965049865, "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6016204186733703, "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.2777551012631926, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.49423240120783246, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.7964573357809173, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.8458636471716781, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.34633672321253084, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.5378805625051344, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.3582301850807646, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.5380305837807603, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.300740577257699, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.5272774705181614, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.3099603853356145, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.5209233176748354, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.35580399268816465, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.5392592206305507, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.39317381456022266, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.6026058740561834, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.48930936408255293, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.699085629239476, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.3963410285961713, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.613166190285915, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.44294247711132617, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.5915660675216782, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.3756985486608933, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.5991443770283833, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.5009456904181451, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.6893719644090858, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.18273944860385094, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.44261865187418153, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.2153742037697241, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.4581737688885401, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.3372953649368346, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.5482505380106469, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.28528905353056333, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.4885812318466243, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.2935204022158406, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.4867597973247361, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.2929684584911775, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.5038324436049059, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.4034224234291925, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.5736798834726872, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.1077205146963877, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.428338145564396, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.22327767951697297, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.4063556880747369, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.2572733200413211, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.4520014138562526, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.40311197004738203, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.5788525108956781, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.45313578977486535, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.6160993561903745, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.2651736858432996, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.4491383344282561, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.34545319957597864, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.5727052860304503, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.15815751066481462, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5152611872266766, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.12903696060775005, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.456225988032654, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.024459391267874976, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.12351824822447692, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.46822754470803873, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3465147345201782, "sentence_nr": 1 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.08516700886866406, "sentence_nr": 1 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4091252890943268, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.19194937906573872, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5477665664300843, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4370196290761142, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.20669086265781264, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5076721272198604, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.17630490037560695, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.48116430160978857, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4122750002638689, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.15412719160788987, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5010353699512481, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.12369892692249995, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.44549610902403686, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.12560672881768975, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4969560260291519, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.17077058518804336, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5022008374701596, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.10784756064735967, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4427230465401631, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.06735571462439276, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.38102852892512806, "sentence_nr": 1 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 1 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.42723260976616784, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.1694466724647263, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4902502031746037, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3532931581623198, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.175396614619324, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.49736499605529066, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.15154395847232716, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.46053919348995803, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4041678259311437, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.1290514243115152, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4766581477336301, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.08273178236238297, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.36399666460809255, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.12601482779921785, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.43595665254608706, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.40959087443621306, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.6348509381122925, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.08214106568089705, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.3969463877642616, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.0744904632040495, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.4111163205685468, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.12894104034845807, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.4486368934849452, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.10070927557742705, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.43718220262892105, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.0772718393063023, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.4203683137304257, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.0756907193511249, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.4138725093679467, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.21748353646757182, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.4462746462826943, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.4179644538349004, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.10505106462290037, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.4474870048911137, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.0009218289085545725, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.15653859793617866, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.43177798053127925, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.0891537192318598, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.3970634926176537, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.0950136506275681, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.4372017487229785, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.1259356760989446, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.44568274520971096, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.16322494183480127, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.4815584993817062, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.0904087252785689, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.41830513174690515, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.21351902664706998, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.5130443042033361, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.16269986423611488, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.06939838145153245, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.3371547585108182, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.1691386174483793, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.4920789340026317, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.14944432524273302, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.4972796478830659, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.09793316925795417, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.4297577431879659, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.38870674200492367, "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6484380084879691, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4923751299732868, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6853756490381199, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3996712647649035, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6353525755760105, "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5115346945020283, "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.7037574715738644, "sentence_nr": 2 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.017834618169115152, "sentence_nr": 2 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.05927156798818119, "sentence_nr": 2 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.23904922011090457, "sentence_nr": 2 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3399292774084129, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6152980280400979, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8311281590297233, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.005449161724399305, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.026158029267484995, "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.24508104771894088, "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5725552336126134, "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.20801258614305904, "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.26703508536995574, "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.35315040956049437, "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.625895188503691, "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.11133996756497437, "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4410280353998367, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.17743299460161885, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.43071271897416463, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.16052654068024738, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.41580120868053494, "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.05963579607071745, "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.31139762378406344, "sentence_nr": 2 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.006734847287559362, "sentence_nr": 2 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.03408121951468736, "sentence_nr": 2 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.09880177230676102, "sentence_nr": 2 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3297638349619511, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.2377604053257556, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5662768009060447, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.22573408807826306, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5444672928195973, "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.10742716472890976, "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.42694859148910824, "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.14745870033404418, "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.475170637938921, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.21665407194210906, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.4344921442639243, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.37994652561206577, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.6464467277069994, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.09362261118571368, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.3452056942265759, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.18917620656425485, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.4346170232980484, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.420450507904553, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.6503146347305717, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.24894072982768842, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.5212235893093335, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.393613605227227, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.6492198447661237, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.21147734744561483, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.41020178654369294, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.2329856851831642, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.5405751250637106, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.41756686236967944, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.5616829345739638, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.38189567401226293, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.6154314825900052, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.2126707920684064, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.4659908460634765, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.23240102389974368, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.4973274282641141, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.17979384730979156, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.4177311931467539, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.1702602472176709, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.4366640707779677, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.933651069586263, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.9586507529693243, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.3816408219023713, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.5784105768028126, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.18398226639192106, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.37285010531146734, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.26958884543190903, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.5631664732610485, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.4005296397635166, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.6201785376974677, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.15956483578595942, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.425693420655628, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.2323385180696658, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.5019509292309764, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.22952177306405494, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.5279520952576137, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.3618488169166299, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.5708179622131996, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.1712766252338756, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.5225554962608486, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.2709079038456153, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.447458019441992, "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.38249626297768063, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.40976234193505356, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5806197937310393, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.7346706700987636, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5793367580502561, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6502428441722727, "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4855332614117322, "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5299556742893647, "sentence_nr": 3 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.19940445989088915, "sentence_nr": 3 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.43164821827950184, "sentence_nr": 3 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.2423441824135159, "sentence_nr": 3 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4429509373913047, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6242817472465665, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.7056438934239434, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6064630666233242, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6752055521830945, "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5357110024227318, "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6365941772753647, "sentence_nr": 3 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 3 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.14790264259417688, "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.27159767590045303, "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4751132438608344, "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6849386986272349, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.08635800047213174, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.218109371254876, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.23386786214190372, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3682311523733465, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.11739521786077453, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.22090491782919655, "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.1892240568795935, "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.280413108453108, "sentence_nr": 3 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.11547518641061649, "sentence_nr": 3 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.25945846414490087, "sentence_nr": 3 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.20233074088759792, "sentence_nr": 3 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3746629492952356, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.40214612768560637, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.45128424593135114, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.37284875432797243, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.44888401040760956, "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0925329498915617, "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.2110486160692096, "sentence_nr": 3 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 3 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.12453389344594705, "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.141543757252386, "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.2594145364221844, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.6244631487487835, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.6931369519059803, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.25383339228798274, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.45896379476820603, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.17200767571780612, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.3723150838362789, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.1477219991186121, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.28685201698226354, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.3254455687469726, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.4474512036484817, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.23887527917609022, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.4120359948636439, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.580451128369423, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.728208634600343, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.3556521383601747, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.594830811413066, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.21629114799587432, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.3542320138389837, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.580451128369423, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.728208634600343, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.27405612859390877, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.4639958592456083, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.13004800471424346, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.28217142159025543, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.37821486365532614, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.4718665834023439, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.3699382260470039, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.4032851361478274, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.45167594566243024, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.5169677927619225, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.23386786214190372, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.3780009826926042, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.3925121365052661, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.47788592802001717, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.1423412184218882, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.2596718628394258, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.3572188192648703, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.45381175288762937, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.07425055521504613, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.18122341046764998, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.1978585723043446, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.3527599187160617, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.2523019529343173, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.4406369072888057, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.41072675483179805, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.5635589150380774, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.3883375900135818, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.4643731845106876, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.6242817472465665, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.7123666275414222, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.8003203203844999, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9453478043428296, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2246029757863831, "sentence_nr": 4 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5773502691896258, "sentence_nr": 4 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.7999099314029202, "sentence_nr": 4 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6417603075499863, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7825422900366437, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8503171627677965, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.37709297891717664, "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6881502501430368, "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.8003203203844999, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9453478043428296, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.629039349740581, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.629039349740581, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.629039349740581, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5581982021478125, "sentence_nr": 4 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.629039349740581, "sentence_nr": 4 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.652013511062815, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.629039349740581, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.629039349740581, "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.629039349740581, "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.629039349740581, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.629039349740581, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.629039349740581, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.5881561248602009, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.40435987083533204, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.39858613265631837, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.4425973012069069, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.47160616105623426, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.5309982646782259, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.1892240568795935, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.6151179643430991, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.41238100267720657, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.39909989628767284, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.17181529671327242, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.5293474685884572, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.4429196299668147, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.3830425592586042, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.1667955161379731, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.5802683403568892, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.3237722713145643, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.7426638026175545, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.49342175914364256, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.4352628824108997, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.5116862201536014, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.33471616336068044, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.2865612242047131, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.6433813179203622, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.3598792258309727, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.5125809225356253, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.5539920925426138, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.5226572946586268, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.5073395824633415, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.29382595610734974, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.1667955161379731, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.5773664661124461, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7013062757071812, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9303769449292738, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2381658499765768, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.8492326635760689, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9063898435384111, "sentence_nr": 5 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5309354663044072, "sentence_nr": 5 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6756014232714684, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4529852871970908, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6941474239078328, "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.8522456714074852, "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9096914044088521, "sentence_nr": 5 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 5 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.9457416090031758, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9892952933418456, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.47410002229034043, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7538467008030766, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5087473540251254, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7647955332172516, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5087473540251254, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7647955332172516, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.47410002229034043, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7538467008030766, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4234885228074744, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7410180114887145, "sentence_nr": 5 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.47375069012411286, "sentence_nr": 5 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7543919667018285, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5738396574789242, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.798357133373606, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5738396574789242, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.798357133373606, "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.47410002229034043, "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7538467008030766, "sentence_nr": 5 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.47410002229034043, "sentence_nr": 5 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7538467008030766, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4234885228074744, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7410180114887145, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.47375069012411286, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7543919667018285, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.5091224918749461, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.7829685247145245, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.40276720463657734, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.6529271690805427, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.30188353873287377, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.6086565367747951, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.6026286934891149, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.8025775976044891, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.4596980088392874, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.713787745993602, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.3272712268138726, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.6272846474183881, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.2981792160679168, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.5788026000794341, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.7012294787544179, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.8478115719875968, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.21690365808279138, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.5384773678665918, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.25711386542134795, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.6088853751738869, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.5695988432761473, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.7516103467926585, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.5072784644062104, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.7361065921505279, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.38091370416670794, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.6438225861756911, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.5091224918749461, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.7202697992734389, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.6917901740466924, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.8479928839177578, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.5309354663044072, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.6990707992725005, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.1673872929477023, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.4506667273103674, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.6917901740466924, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.8479928839177578, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.3416581331218724, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.6578570934289981, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.4797543511401896, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.7240781310560407, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.6401876410870359, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.7526484951226097, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.33057129676705455, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.5669225664686625, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.6004981752197522, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.7697646564917222, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.8492326635760689, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.9027320255916917, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.30350690419450826, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.569133886912883, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.6834516951654327, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3263040636562357, "sentence_nr": 6 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7511573912724299, "sentence_nr": 6 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9453473543978153, "sentence_nr": 6 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5971070986250356, "sentence_nr": 6 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8874294965619517, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7511573912724299, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9453473543978153, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7511573912724299, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9453473543978153, "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7511573912724299, "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9453473543978153, "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7511573912724299, "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9453473543978153, "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7511573912724299, "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9453473543978153, "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.19464521962073492, "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5838790966762375, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.3142665434344143, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6466526067220029, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.3751840463233443, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6279894552667558, "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.19268479640608693, "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.551397074868541, "sentence_nr": 6 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.17470942957770763, "sentence_nr": 6 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5403400891349619, "sentence_nr": 6 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.19464521962073492, "sentence_nr": 6 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5763410052067085, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.37392149096896676, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6665214662145853, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.19464521962073492, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5838790966762375, "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.19464521962073492, "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5763410052067085, "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5460240376042262, "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.24343304284910333, "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6275577931282961, "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.28571962561926445, "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6431872581462166, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.6255340042200862, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.8724783049357475, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.48994561421713123, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.7411155087367244, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.5971070986250356, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.8874294965619517, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.4547900039222725, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.6541971428810075, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.23198210427894825, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.630711601223299, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.4831233610237384, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.7122562458056777, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.32263864160302524, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.6824395076981005, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.38305978177479755, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.6061131723054572, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.24047860794644352, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.58198979036704, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.7511573912724299, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.9453473543978153, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.42984824697674956, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.7289444696770301, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.4881010344921759, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.7317734491561229, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.5971070986250356, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.8874294965619517, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.3684981984538114, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.5606332518476288, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.4536404448264584, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.8020827133708689, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.4545091839935173, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.7166050399790445, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.3370129264673147, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.7096874943799061, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.1624355752882384, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.4952968469712617, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.4831233610237384, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.7807505267551733, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.5595205105615875, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.8322210048001876, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.3142665434344143, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.6466526067220029, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.3610544299180199, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.49125115898082056, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.4284945090100314, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.7164026439677106, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.8578928092681435, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.9422733087334002, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.42818224355402373, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.42105372680687736, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.7001171094008295, "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1582866049832572, "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.34487142413575794, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.15521606028436608, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.37645329404497957, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.12620429887108936, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.35580703793872603, "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.12872220631084524, "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.33602633953270183, "sentence_nr": 7 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.042121062429802174, "sentence_nr": 7 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.14281404499176092, "sentence_nr": 7 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.042575418285137674, "sentence_nr": 7 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.05173688961049459, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3045613775157565, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5275070803493389, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.2734283774929853, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5252214120598302, "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.10203846572325131, "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.33381153680096753, "sentence_nr": 7 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 7 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.014935758919429663, "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.08106107745254391, "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.044304867337633724, "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.20806974344498103, "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.08860973467526746, "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3178004360288637, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.15268019045355535, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.41028757620299977, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.030860166165309233, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.1100250143829584, "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.21255327712152144, "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.43272151570555034, "sentence_nr": 7 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.01486609147288197, "sentence_nr": 7 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.13893773605583024, "sentence_nr": 7 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.06609667473412645, "sentence_nr": 7 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.26197209338359717, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.26064517697298795, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5092206110218525, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.1507980395794452, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4306039128585424, "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.1438459189500836, "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.30693371625402605, "sentence_nr": 7 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0979038733644086, "sentence_nr": 7 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.30211704738953993, "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.009624974244068071, "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.07318255686027669, "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.043420474648595074, "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.2884095690753619, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.13868172938464635, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.3094469764260441, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.11091252683001185, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.26607634610445896, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.18154954789336694, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.4557483776072868, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.1381751568911733, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.3121557499162649, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.20065115069964384, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.4084885616013531, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.12291219097556666, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.3448002180666873, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.20608572305725564, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.4704943905570542, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.085416483900781, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.2825804066750608, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.11452508920842025, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.3212742401272785, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.15478222669012726, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.3550584759508654, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.07875433150726119, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.2638954513805452, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.10734088848154077, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.33946796348247366, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.17795920517030017, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.41862955401967455, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.19388048412249795, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.44361702376789247, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.1237012344369667, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.33331866832253354, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.15589802574348086, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.37894206802233305, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.1948502778967486, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.35525815981538433, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.1618333627385132, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.3458746996740858, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.17393111207515277, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.39042812195808824, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.19064689695123957, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.36954921822756504, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.1785851272602057, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.3800733399524004, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.20113943179758872, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.5054929215592371, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.07088281524771703, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.1725752257112697, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.11901413329120636, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.2908877283991857, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.15593857496482408, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.3832822126692406, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.21107720643690867, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.43911506176829573, "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.13410301071131794, "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3942932268034351, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.30677064886592076, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5308555945242818, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1327526847508867, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.37850602486495205, "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.18405035438430847, "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4142901090120915, "sentence_nr": 8 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 8 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.061826017721563604, "sentence_nr": 8 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.08852681798207009, "sentence_nr": 8 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3583179111355935, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3857436691295343, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5750224388123065, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5920893212447781, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6925021521158101, "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.22478613858269392, "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.44348101018104913, "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.183687049781416, "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.351911486970854, "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5181825846579515, "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.17328174803055044, "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3178268797869574, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.286608441075188, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4579283646292802, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.25861130592298187, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.39452644092432093, "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.20379250618355427, "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.41085414309816914, "sentence_nr": 8 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.310679343206099, "sentence_nr": 8 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4471183729584148, "sentence_nr": 8 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 8 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.2334787866969297, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.3621517589760531, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5866873582151947, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.46269559069048716, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.46872641361415845, "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.10434360980785336, "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3012789660952507, "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.13835317113453516, "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.16343842313572918, "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3986641525285075, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.30890092021323623, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.5553909583113487, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.1690979933029136, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.3751861276375209, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.2840563956846642, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.5110250591004448, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.19920413481788912, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.42537796926163113, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.20401796878756984, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.43317630453631556, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.2044887070217883, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.38471585132587544, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.2980504190448601, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.5101268920225042, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.9436043261706615, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.9880191679951993, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.18831933500600306, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.4318025704181776, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.21544027588567594, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.5040038440508637, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.26970223719007375, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.5172978597562362, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.30630098078522544, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.5439056051092116, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.19850842371858787, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.43584341835040474, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.20170335119323748, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.3541251997977811, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.10508106635796587, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.3182774828667731, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.2309552734743087, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.43975656978777905, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.23530033724858213, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.46208607300298377, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.37284027455688556, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.5528347504734102, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.2887308472548599, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.38846174119508314, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.15487293534817623, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.39293494862736383, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.21741853044139284, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.3535910166292039, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.33626819961829335, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.5466581859383387, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.32000331642122953, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.5480591855923784, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.21132630077912357, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.4175670766052166, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.13108369255325433, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.3929302741911199, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.20174045447955946, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.33729298835089516, "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.20972571494011877, "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.395894071208527, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.35369375385786006, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.13087682931309413, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.19462952976787054, "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.013538497707846785, "sentence_nr": 9 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1570208067577934, "sentence_nr": 9 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4113045280468524, "sentence_nr": 9 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.15082713742973322, "sentence_nr": 9 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3965911699770542, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.15471428129658016, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4580211317461481, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.18928475425929295, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4916060435820526, "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.21940429389247643, "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4343280866601455, "sentence_nr": 9 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 9 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1824401863423467, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.36709433185688595, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3377854698776805, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.521201229892482, "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.12475846123062707, "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.27823340731817514, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.10759927692349745, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.21065794536310511, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.07843772989359644, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.1324578891826276, "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.08163977068875294, "sentence_nr": 9 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.09047502044256338, "sentence_nr": 9 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.21669141850731985, "sentence_nr": 9 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.10322985794794913, "sentence_nr": 9 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.24491122482530842, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.11809057094812304, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.27930342777387007, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.21268444697113978, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3229997133764549, "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.1475503033983142, "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.22104108935973044, "sentence_nr": 9 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.16434349396840395, "sentence_nr": 9 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.28582614857210975, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.10085167559661873, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.23831215045289575, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.17543744527808774, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.28201016956553354, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.12274092982883021, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.3385513651938691, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.31017716089889963, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.21688283061839067, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.41775824162589076, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.12162779391619735, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.3228288840559658, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.18237599479708327, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.3740403511567824, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.12876689524369925, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.3253153379449275, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.119159749312327, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.21297942664093145, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.2036348471340078, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.3472831655579266, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.24362353508932386, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.28135849152758385, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.14482189302397735, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.2913876815877049, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.16306957103469613, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.9199349282509897, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.28112283847231073, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.1308613527030366, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.3063146286877558, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.12787395553510186, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.21931515993565381, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.1441966459257424, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.14957316612525498, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.27675048474641756, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.3780460244391623, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.12503614625842938, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.20624064341134082, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.3368893372278425, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.2961559727627133, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.12846497020051437, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.2670865602673704, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.28252374116432993, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.3549531183419122, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.26128489301072644, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.2126837065505244, "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 10 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.07149097424598219, "sentence_nr": 10 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 10 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7774075575820374, "sentence_nr": 10 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8943538262827356, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.18639667871924825, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4540232715517938, "sentence_nr": 10 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.8431643718744966, "sentence_nr": 10 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9341410275694613, "sentence_nr": 10 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 10 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 10 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 10 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.47095916883357913, "sentence_nr": 10 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.701526330557871, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.38260294162784475, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6692418584049541, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4093629115744712, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6243156092220487, "sentence_nr": 10 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.36703839483583006, "sentence_nr": 10 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6725357332891145, "sentence_nr": 10 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4322450379367835, "sentence_nr": 10 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.697398762810304, "sentence_nr": 10 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.41122010762096617, "sentence_nr": 10 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6697492221087861, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.41126318495820946, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7254294465493162, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4682601513034942, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.691130012325589, "sentence_nr": 10 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.35334199245807973, "sentence_nr": 10 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6858610070406853, "sentence_nr": 10 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 10 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 10 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.33061666631099795, "sentence_nr": 10 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5343307680770133, "sentence_nr": 10 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.38981415389445495, "sentence_nr": 10 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.665622189515994, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.3223937524276847, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.6719135382778884, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.4466645979681496, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.714247354760266, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.6233091888805312, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.7757111039890131, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.33414322499224436, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.7159580680193959, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.6620694102966999, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.7893416551805176, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.26540383860058264, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.51610805930355, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.49335830881778164, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.7240615166053675, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.4024279293206815, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.6798070651801875, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.6153147385756811, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.8160952378322835, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.6838493012537611, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.8178509424142287, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.5169198985488462, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.7879691803533485, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.5223010192696725, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.7442134884509299, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.3885151883045163, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.6763151870864087, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.5985488590218004, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.8248561222494313, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.37163791993879014, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.6792432753943116, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.5152630372775983, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.7696821316655393, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.43521980294891405, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.7204319998551938, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.46417187236805535, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.6653227698984816, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.519124054532681, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.7733428788002137, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.5083170211670072, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.755952798269267, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.3161432307247198, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.5990810117425377, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.40980949787910764, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.7145653936496129, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.5770135999436572, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.7697316849447288, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.7030214416074754, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.8357829168322639, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.49199339399396913, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.713934780293142, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.5002824356846001, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.7029341279811726, "sentence_nr": 10 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.029124970213905314, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.1779610499753793, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.05989397907532586, "sentence_nr": 11 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.13539167567510446, "sentence_nr": 11 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.03073685498855941, "sentence_nr": 11 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.08933758530290428, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.21051269871304829, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.18854722085547196, "sentence_nr": 11 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.1387123733773652, "sentence_nr": 11 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 11 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 11 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.05499461839884487, "sentence_nr": 11 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.19978068293555115, "sentence_nr": 11 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.1388011701223677, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.1460389336009171, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.038796252164058714, "sentence_nr": 11 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.1756002877791377, "sentence_nr": 11 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0029868578255675027, "sentence_nr": 11 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.015380253532528225, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.19065171436703615, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.21083781655774478, "sentence_nr": 11 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.14590438247348272, "sentence_nr": 11 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 11 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 11 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.04379419293412465, "sentence_nr": 11 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.15119622228734425, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.21315318926996712, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.16991425356152365, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.22371589981083434, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.20982178138488494, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.20189358781069322, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.20261685251676126, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.226729844497646, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.18184342512086546, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.2185121523322681, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.17386106914161167, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.08272059515141832, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.1814025725787457, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.23945930551153607, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.20815933215961574, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.09886053260067004, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.14345644530149382, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.19097844728039898, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.08246021416977749, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.18868639139421345, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.20665565461558383, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.17764901410543646, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.19312651305380893, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.21371557282714232, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.18854043679878274, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.19559831357902827, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.1914895496057553, "sentence_nr": 11 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6666935927206881, "sentence_nr": 12 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.7957561291403441, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.34999116613463505, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6356075517191035, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.48649824146709, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6763447333054696, "sentence_nr": 12 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.367622917844187, "sentence_nr": 12 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5615050712672139, "sentence_nr": 12 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4081538556642202, "sentence_nr": 12 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.46386216052527535, "sentence_nr": 12 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4300174433641992, "sentence_nr": 12 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5099800158255156, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7963205130973803, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8101688749569373, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6570128212612868, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6262090565616182, "sentence_nr": 12 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5866943184579982, "sentence_nr": 12 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6390393619950272, "sentence_nr": 12 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 12 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 12 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 12 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.01047222192173988, "sentence_nr": 12 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5683565265173782, "sentence_nr": 12 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.7072367582469653, "sentence_nr": 12 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 12 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.20287366424876002, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5368464080033196, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5198707241967666, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6993305416237223, "sentence_nr": 12 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.36603776814499195, "sentence_nr": 12 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.45532918164901276, "sentence_nr": 12 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 12 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.13525036115537795, "sentence_nr": 12 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.3120848453730729, "sentence_nr": 12 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3474347870952493, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.7073395735740273, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6577952971578602, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6570128212612868, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6221526807313811, "sentence_nr": 12 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5460462259563637, "sentence_nr": 12 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6641829079106271, "sentence_nr": 12 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 12 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.04884431803904408, "sentence_nr": 12 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 12 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.18357384275951122, "sentence_nr": 12 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.28073304156067924, "sentence_nr": 12 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.360657984953223, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.46365764298816153, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.5757521453586436, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.3147715014841853, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.5986154863155839, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.3885646234110734, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.5051669760132699, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.464413403675355, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.6291656356697347, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.30490938758882236, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.579088460457721, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.3758073513458154, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.5302950018189692, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.29308025637967977, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.5715200997140051, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.43285599641891276, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.5551678521355665, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.25984882476296983, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.6305744214119023, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.48649824146709, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.7255446918266525, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.464413403675355, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.6853183317800515, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.34999116613463505, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.6356075517191035, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.4426623526629488, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.612058732370435, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.5522004843736675, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.6166558670381421, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.37954187220913477, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.5550325994532472, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.3147715014841853, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.521228891025682, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.3964513253420688, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.6095420129111676, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.36033217429111203, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.5550014071110869, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.33403925633579773, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.5753930328058733, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.44882520213790794, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.5856175239899348, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.42760828727369016, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.6065010489098535, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.33403925633579773, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.5915394296427854, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.3212785834179169, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.6158121620368939, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.1751489536280261, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.378593296276962, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.3214110553053944, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.49232390716994445, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.479033905070678, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.5975149526416976, "sentence_nr": 12 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.13150403915662862, "sentence_nr": 13 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.21177549089429396, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1424915360855107, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.23985076149753726, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.13309638637723345, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.18696197122203645, "sentence_nr": 13 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.12256515595630638, "sentence_nr": 13 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.23303109995893123, "sentence_nr": 13 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1419886619859991, "sentence_nr": 13 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.24113733359485448, "sentence_nr": 13 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1324448705928064, "sentence_nr": 13 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.22863839042697148, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.12017886776600228, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.20794486026487116, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1164257728844972, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.19249901344360867, "sentence_nr": 13 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.12325384013681445, "sentence_nr": 13 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.1960232617116645, "sentence_nr": 13 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.12806473847444227, "sentence_nr": 13 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.20054688779645718, "sentence_nr": 13 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1345714227066951, "sentence_nr": 13 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.21078968525268058, "sentence_nr": 13 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1802615495980454, "sentence_nr": 13 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.19630112442374525, "sentence_nr": 13 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.8212614342207556, "sentence_nr": 13 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7876222308170935, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5888582552569348, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7876222308170935, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5888582552569348, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7876222308170935, "sentence_nr": 13 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6871546336787117, "sentence_nr": 13 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 13 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6871546336787117, "sentence_nr": 13 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 13 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.43550490048931545, "sentence_nr": 13 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6419345531187637, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.17539593635425982, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3139104155809725, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.39225487001250453, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5189967318357492, "sentence_nr": 13 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.12859070457371286, "sentence_nr": 13 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.22162336097079333, "sentence_nr": 13 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6871546336787117, "sentence_nr": 13 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 13 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6258765997974801, "sentence_nr": 13 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6680248455809015, "sentence_nr": 13 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6258765997974801, "sentence_nr": 13 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6722124517361844, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.17023327167529265, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.2521455524828544, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.13150403915662862, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.2229548791980166, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.15247670030930355, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.1324448705928064, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.23382021475411732, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.09766807787022613, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.16788063248730647, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.12111615182138995, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.21505717177216926, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.09979796185764318, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.1310501345458609, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.11512937599552589, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.1852451960926282, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.126642985054506, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.20913543330915318, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.12632059501697884, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.22490978846607526, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.1352612651586241, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.22176710342008016, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.18982400330057914, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.11760179026027952, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.19531596229980544, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.09968269909242322, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.14510210137368384, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.1204925245474865, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.12192273449574796, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.18177358407861108, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.16841504132177978, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.10667790151233097, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.17427579502643556, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.1508875367739971, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.20889434105456664, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.13184959768302618, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.30505662513933907, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.09878901581794378, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.1651800705978423, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.13150403915662862, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.20736628090200235, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.11824658049755846, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.2047497542808756, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.1461072488843534, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.1946917085815184, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.1018151014848322, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.14524830913329922, "sentence_nr": 13 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.2589080403198245, "sentence_nr": 14 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2189767496390278, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.09761931247072746, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1397102655312677, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.1326689502117876, "sentence_nr": 14 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 14 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.167569694983793, "sentence_nr": 14 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.15848968577272604, "sentence_nr": 14 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.24447662789322752, "sentence_nr": 14 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.20665940380705064, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.18243716955007858, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.16168125580314086, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.2450013599045987, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.20901732384345645, "sentence_nr": 14 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.20222677481313764, "sentence_nr": 14 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.18492694642397273, "sentence_nr": 14 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.18243716955007863, "sentence_nr": 14 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.16667457585564618, "sentence_nr": 14 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 14 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.08556679632324991, "sentence_nr": 14 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1575852366903021, "sentence_nr": 14 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.1474874322154398, "sentence_nr": 14 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 14 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.9202663016973823, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.9263876898254182, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.8621431910551439, "sentence_nr": 14 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.8363304387269249, "sentence_nr": 14 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.9419492177147062, "sentence_nr": 14 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.9202237383102091, "sentence_nr": 14 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6656058483395763, "sentence_nr": 14 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6306557167105028, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.8657947138469048, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.8367521498141209, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.9419492177147062, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.9202237383102091, "sentence_nr": 14 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6993348038140574, "sentence_nr": 14 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6335836519040372, "sentence_nr": 14 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 14 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 14 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 14 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.9419492177147062, "sentence_nr": 14 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.9202237383102091, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.11064738383914807, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.12449466772796605, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.12222372495044852, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.12383047729216191, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.1392580908972882, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.1333265070823728, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.07717159074475938, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.09413026539458375, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.16807498532991816, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.16404257857373192, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.21005284223037346, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.1679703861465872, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.0951509584925814, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.12014553061064691, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.11737915185320068, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.10085050674562507, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.11377195287577829, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.1301681094143453, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.09455636771034115, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.11463120929696417, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.1544787887603271, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.1384236976807813, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.11488572123868507, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.1455973492295447, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.13735441291745387, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.20255423961944058, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.205408273869532, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.11470196605012067, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.0960438892364715, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.07184436307032757, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.20378989148152887, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.16337212771611656, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.09669863605676213, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.10886215421099144, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.18171364159867548, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.16245793974098002, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.07562263205281951, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.09819928715831736, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.1430606569063152, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.128073928655324, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.09526781380423786, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.18223449608285797, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.17127401148639734, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.09855718610544388, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.09669863605676213, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.11679541132562438, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.09643517424337235, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.1226126790254367, "sentence_nr": 14 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3969253441303859, "sentence_nr": 15 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.43277080710930865, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.26887073704667247, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2918476164856665, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5183146371291372, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5942793492554739, "sentence_nr": 15 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.028864519535915668, "sentence_nr": 15 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.13535086012687783, "sentence_nr": 15 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.29687399422087424, "sentence_nr": 15 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.002376388269368755, "sentence_nr": 15 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.04574695485583133, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.2703094106380642, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2982249908859, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.29313061087267483, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.30295384730328956, "sentence_nr": 15 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.284911205299835, "sentence_nr": 15 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.32067889250923776, "sentence_nr": 15 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.29353055611145706, "sentence_nr": 15 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3381266475327612, "sentence_nr": 15 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.09910529437987022, "sentence_nr": 15 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2511990291834263, "sentence_nr": 15 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.295394335805579, "sentence_nr": 15 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.362515947701148, "sentence_nr": 15 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.7243776840931383, "sentence_nr": 15 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.8980107630353439, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.9325718821645923, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.9490053815176721, "sentence_nr": 15 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6653044831075519, "sentence_nr": 15 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7986980418662383, "sentence_nr": 15 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.8504591592783618, "sentence_nr": 15 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.8980107630353439, "sentence_nr": 15 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5950322600507224, "sentence_nr": 15 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7090542316843602, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.44768974737795825, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.45520472994232203, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6509298345623671, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7962234681835563, "sentence_nr": 15 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.41813929088914065, "sentence_nr": 15 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4779008399806691, "sentence_nr": 15 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.7243776840931383, "sentence_nr": 15 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.8642805496461259, "sentence_nr": 15 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.9506885335787997, "sentence_nr": 15 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.9606382935593174, "sentence_nr": 15 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.8665175293126633, "sentence_nr": 15 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.8642805496461259, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.342569723746894, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.47156710056973744, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.2319934375578505, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.3367678538644817, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.2261681529206079, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.2647144854968396, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.35554722872430145, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.38873710544604445, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.3720000272862786, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.44695658930348453, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.4118588818865406, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.48573453292579605, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.2998354233286452, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.41144215385645566, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.42142495511264777, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.49708063531780444, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.33296735510279596, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.4176386300927819, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.32522259162581857, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.3572499606049779, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.3449668516380805, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.4341194278942322, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.36161896085795575, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.5052818563161547, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.2798191316489921, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.31866179281073254, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.3170440263520106, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.45327673850268096, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.15538140800156827, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.22365453282977818, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.1352815632479558, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.2610624350708668, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.35907597395908514, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.433310273977633, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.33498522957587384, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.4529680464694055, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.30675389390381064, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.49190118767827684, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.13922661372145656, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.27553494979330584, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.3515170550015674, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.37881852198491145, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.30950829536527374, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.3839157172568008, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.22141947821999777, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.3633108862011865, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.2957849631521743, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.2872269269040579, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.19474118932727338, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.3257294949902081, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.26505727008662233, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.41342120940573923, "sentence_nr": 15 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5489548889989204, "sentence_nr": 16 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5292552311493306, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.43141660874998483, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4251732952639193, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.479859141564773, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.47978767796651084, "sentence_nr": 16 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.2751349202729036, "sentence_nr": 16 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.311148395820729, "sentence_nr": 16 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5435154526669127, "sentence_nr": 16 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5395341377171525, "sentence_nr": 16 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5777979902630328, "sentence_nr": 16 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6331337405946555, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6121338866063298, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6222767269627676, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5440627210252523, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5801365308278273, "sentence_nr": 16 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5097049681318312, "sentence_nr": 16 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5622473457673939, "sentence_nr": 16 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.42567378467735034, "sentence_nr": 16 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.470165978205223, "sentence_nr": 16 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.47594607773277786, "sentence_nr": 16 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5363851621507516, "sentence_nr": 16 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4533373633026252, "sentence_nr": 16 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5042718376547173, "sentence_nr": 16 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.8509306641805077, "sentence_nr": 16 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.9162670716850285, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.9682566771439106, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.9779127328168863, "sentence_nr": 16 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.7040822331405046, "sentence_nr": 16 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7673268835807536, "sentence_nr": 16 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.8509306641805077, "sentence_nr": 16 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.9162670716850285, "sentence_nr": 16 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.7639225615341296, "sentence_nr": 16 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.8135226479972402, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6729400620282456, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7157738382386983, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6736973998414632, "sentence_nr": 16 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7157738382386983, "sentence_nr": 16 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.7640211005075139, "sentence_nr": 16 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.8179683170395244, "sentence_nr": 16 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 16 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.8509306641805077, "sentence_nr": 16 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.9162670716850285, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.43141660874998483, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.45005622460103567, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.5269212212163125, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.5528502361092263, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.6736973998414632, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.7157738382386983, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.33491174038847354, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.3646077683106875, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.450293182440332, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.4822292034174927, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.19834633509680927, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.2712763621688402, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.546749262754264, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.5830342194369027, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.2754139367364165, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.34665831783057166, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.42877544777223947, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.43803970127356867, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.43908893511874636, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.4785460996828672, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.5898466143484524, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.6611594562951559, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.44701416909786756, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.5245065297475329, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.31417347869916407, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.3530975487930333, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.6373258340947424, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.6437421244363288, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.4715455630189013, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.543275675805182, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.2807304798995431, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.3418543172008782, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.5397682182130759, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.5703951757357331, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.5446420954986508, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.5662782206307382, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.3378721588486122, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.4362453299175689, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.49288474585647657, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.5578180330951528, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.36197274748300795, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.36134314178088084, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.17060055774694924, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.2566677182784047, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.5717883675148524, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.640780099960748, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.221071468018936, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.41620491059292214, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.4263215396273059, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.3711481893609263, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.4101392170618868, "sentence_nr": 16 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7838756540325346, "sentence_nr": 17 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8813081534414112, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6486802664285581, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8066891982024211, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7344798528986015, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8855631322316195, "sentence_nr": 17 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6486802664285581, "sentence_nr": 17 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8585894188661937, "sentence_nr": 17 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7838756540325346, "sentence_nr": 17 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8813081534414112, "sentence_nr": 17 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.8434569599214109, "sentence_nr": 17 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9123500588239437, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7849324644314795, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8934780380564308, "sentence_nr": 17 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7838756540325346, "sentence_nr": 17 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8799941663695641, "sentence_nr": 17 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 17 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 17 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6809354000776107, "sentence_nr": 17 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8640242853252401, "sentence_nr": 17 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7838756540325346, "sentence_nr": 17 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8841725044915145, "sentence_nr": 17 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.39503194300684213, "sentence_nr": 17 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6916289318228928, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.3094285625931604, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6328843883953666, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.30888995556875376, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6801864286113619, "sentence_nr": 17 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5512199399393973, "sentence_nr": 17 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.45862256824436665, "sentence_nr": 17 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7660160731572102, "sentence_nr": 17 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.47770079267358434, "sentence_nr": 17 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.8053780976175922, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6259358824502687, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.8067950339997761, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5296344689827603, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7183083787484315, "sentence_nr": 17 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.7568440125092788, "sentence_nr": 17 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.8347576899702969, "sentence_nr": 17 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.3001800600660342, "sentence_nr": 17 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6794930944968381, "sentence_nr": 17 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.18879642915927602, "sentence_nr": 17 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6584653291380502, "sentence_nr": 17 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4697979053121435, "sentence_nr": 17 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7554660353280213, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.3164389365959547, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.7121929522648841, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.6031798395521694, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.7819677495994619, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.5646631238098637, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.836206348617966, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.36615107686578496, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.696074520676609, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.1543252261021413, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.4932064977882042, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.6966863379186454, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.7941296295595748, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.5487584440377526, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.8692797308530646, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.8787142254774354, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.944457825946867, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.5463887965663883, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.7033378749149323, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.4912217876159168, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.7991339910300419, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.7251215108320924, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.8334871013677937, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.587725019570444, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.7957550794048827, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.28856268147560865, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.6187787024786685, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.4402122771181734, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.7716344099519011, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.18465966669442654, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.503938463452404, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.17973438065210462, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.5509051817440759, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.4809103179432793, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.7499547288317748, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.6244070585346295, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.8433626077474702, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.43660156107563336, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.7165816705519701, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.3748533897614559, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.6863935447402433, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.3607442374649342, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.6876955247522804, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.3718491333506089, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.6941552634040441, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.5110976370499285, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.842915559657988, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.5591535564944223, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.8079980831297509, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.11809858631445573, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.5943886568930294, "sentence_nr": 17 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1423170365140828, "sentence_nr": 18 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.38605131339325, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3230989128220882, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.13860487750886114, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.36659667376085786, "sentence_nr": 18 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.36295227908523897, "sentence_nr": 18 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.13860487750886114, "sentence_nr": 18 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.36118801210741663, "sentence_nr": 18 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.40877861250593944, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.16673024281943524, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3975048254243706, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.11262865194228103, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.36030161445252334, "sentence_nr": 18 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3066941236048102, "sentence_nr": 18 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 18 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 18 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.409404483413751, "sentence_nr": 18 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3629681915617596, "sentence_nr": 18 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.12162779391619735, "sentence_nr": 18 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4136500403395244, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.1909693288724605, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4115524982336727, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.14192760409508295, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3989311390496819, "sentence_nr": 18 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.20304460086424203, "sentence_nr": 18 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4966336271433132, "sentence_nr": 18 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3935462418730863, "sentence_nr": 18 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.33523829330170474, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3250861966671464, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3051626462022859, "sentence_nr": 18 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.30944349609311117, "sentence_nr": 18 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.11556522074454477, "sentence_nr": 18 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.372688132616477, "sentence_nr": 18 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.22392361812003433, "sentence_nr": 18 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.460938469666163, "sentence_nr": 18 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.10704943109718215, "sentence_nr": 18 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.362953271903766, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.14392660099814805, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.376362134090542, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.11718316363212337, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.3844506520287143, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.4024646900219184, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.08197539732074254, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.35287478964221025, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.3502198678697797, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.376636825008991, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.30372034137078635, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.21481172921264619, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.4009028477501074, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.15065778147399764, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.4580508275161034, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.21281360709834968, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.4292702902558381, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.13780534982274106, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.3273034480518148, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.36078900962911326, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.2491467453273127, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.47986445165634506, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.10905122148101043, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.4502571446121065, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.274959074733397, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.3607206140473947, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.17796237395371306, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.48209511527864385, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.13644487773607678, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.36491236604183974, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.21850577875478958, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.4494281444270959, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.31361999490423276, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.1222354265296326, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.3727252294250617, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.1109484758001971, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.3612426584883393, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.20356858406857398, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.46358366365120834, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.11530762783711283, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.3781690117672006, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.11907182322580316, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.49599003474365394, "sentence_nr": 18 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4220964985804286, "sentence_nr": 19 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4455062898838481, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.32026140564476524, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4016870075045671, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.34697616124581016, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.40373943351486685, "sentence_nr": 19 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 19 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4184617303786878, "sentence_nr": 19 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4321132548050678, "sentence_nr": 19 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3499900041521066, "sentence_nr": 19 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3822330369569219, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4220964985804286, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4455062898838481, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.41428013900466737, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.425713879206717, "sentence_nr": 19 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4184617303786878, "sentence_nr": 19 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4321132548050678, "sentence_nr": 19 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5102296603076779, "sentence_nr": 19 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5412065437629714, "sentence_nr": 19 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.32282559495424096, "sentence_nr": 19 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.38266426308756574, "sentence_nr": 19 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4230074457298372, "sentence_nr": 19 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4432451111759523, "sentence_nr": 19 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 19 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6363676859401174, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6744544901797789, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 19 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 19 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 19 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.9271746317040298, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.9736668125871423, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6986939462620247, "sentence_nr": 19 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7821077250864037, "sentence_nr": 19 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 19 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 19 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.9184678024441792, "sentence_nr": 19 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.8884834862973964, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.3797391466432489, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.3481158447116987, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.31102805827817165, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.3375837027261476, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.19710660977672484, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.2646181750020499, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.3797391466432489, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.3274816319655301, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.28493958837889694, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.35876163607595707, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.2485364833746714, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.2873862688213756, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.41664461891968263, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.42600414573009276, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.2710684964643971, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.2982841390442802, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.23005567239800093, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.29184715566281483, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.2741455993358603, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.36403543443534025, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.34279101776553306, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.42600414573009276, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.30955822779938535, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.39546682876478195, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.39475108115635776, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.42154888635191134, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.2781617026804374, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.32302333182207527, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.08473168573832755, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.25650903369815853, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.2883871807684295, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.21660761852515356, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.25414220830184964, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.32910644083871465, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.29306886812256966, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.18084108219203518, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.27583433958197495, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.25612947694888455, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.3002607987321696, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.3216291288446239, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.4272249853925079, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.188590266789637, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.26177705380820604, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.3308736026652116, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.3875427536757155, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.28432597056103653, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.35944124408933287, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.23631465024334478, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.2692006325646732, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.259615032947222, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.2855780701161316, "sentence_nr": 19 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.31343233007308363, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.28662182336952924, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.289946670354745, "sentence_nr": 20 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2585958231966256, "sentence_nr": 20 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1574562620502688, "sentence_nr": 20 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2833933092608246, "sentence_nr": 20 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2330649391612961, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2858508520944113, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.17248469309075373, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3673041887389201, "sentence_nr": 20 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.28838937143148047, "sentence_nr": 20 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 20 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 20 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.25480888745972646, "sentence_nr": 20 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.14839290005301392, "sentence_nr": 20 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.29565285341782266, "sentence_nr": 20 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.22266775943086, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.10508106635796587, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.2504422832248121, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.22563365567811913, "sentence_nr": 20 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.12913533075470382, "sentence_nr": 20 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.24776496881674256, "sentence_nr": 20 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.08680476715745516, "sentence_nr": 20 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.22066482174709295, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 20 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.12117880855911824, "sentence_nr": 20 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.32137825349405363, "sentence_nr": 20 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 20 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 20 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.20104685618767446, "sentence_nr": 20 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.25137213099939626, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.28372673673489807, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.164799256779143, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.32187376249458133, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.2969522070783606, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.14440617372843148, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.27200704330334224, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.2442053369522631, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.33050427873462274, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.2133219421911448, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.3424665224706109, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.8944054777319608, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.24197054442617688, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.21682999057776514, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.3722897460532404, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.24424323100599224, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.2205591704292585, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.3479467223515336, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.1926917267834754, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.4545444680350158, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.17580772500133016, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.32957763052496886, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.2148084015365523, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.40974307981059804, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.29622141199363383, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.24146688269469918, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.09958408398703665, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.22890983822248492, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.20795712301883962, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.282761705091657, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.2551114536415265, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.18112053860965763, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.3266298821510716, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.1423412184218882, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.26467729752192487, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.286072901441292, "sentence_nr": 20 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2851456053265138, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.09858834583812252, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7445389400758123, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9134769668037408, "sentence_nr": 21 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2506297252541463, "sentence_nr": 21 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 21 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.8320381765431424, "sentence_nr": 21 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9129044064886581, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.23443139907396643, "sentence_nr": 21 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 21 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 21 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 21 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 21 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 21 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.29972668857564216, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.12409597120849801, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.2928237514438983, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.15083364266523736, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.38662429787924074, "sentence_nr": 21 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.22849324967229787, "sentence_nr": 21 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.44152236347960977, "sentence_nr": 21 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.2989569143807341, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 21 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4042166909648807, "sentence_nr": 21 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.3423939053207622, "sentence_nr": 21 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.17611268473423294, "sentence_nr": 21 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.20441543914149457, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.18928624746011372, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.43639616127375797, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.29213008358451265, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.5828788445270403, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.14679869139754204, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.4021419566569229, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.329340597116918, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.6347143291802012, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.2868708266227936, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.5779499593492363, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.3436610762802303, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.2782087319667435, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.632418768195088, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.3083012995502152, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.6392851743718383, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.23050898626566632, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.48172150010681464, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.1969221590285716, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.5644899370701738, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.32594818888335836, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.49646222671189383, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.4604008032403599, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.7444026788985108, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.29161716271402766, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.47302621872495865, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.6854823532900025, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.3546725638586892, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.21468316165048362, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.6851126041819388, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.250737833894674, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.40017617077306594, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.27204995504877727, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.2743963944428051, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.6341922683775969, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.7252122374710612, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.12586347848916266, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.3554854950683664, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.3889045463729729, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.20229280648000492, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.6194717199605934, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.31114459650134146, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.11856660123276004, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.34601719602607445, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.40072710492884706, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.7206046648616748, "sentence_nr": 21 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.22174147515312165, "sentence_nr": 22 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2117279815687756, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.33999170096577974, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.29221353951377876, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3058731661111107, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2747352174231836, "sentence_nr": 22 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 22 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.42736771185803385, "sentence_nr": 22 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.39727964545172, "sentence_nr": 22 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.10975022749274138, "sentence_nr": 22 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.13904829787402162, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.2873518361947954, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.24505805183333226, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.33495074569972355, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3454509072842772, "sentence_nr": 22 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.43090467385890824, "sentence_nr": 22 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3813511699401743, "sentence_nr": 22 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 22 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 22 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.22765977642995502, "sentence_nr": 22 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2247283208344801, "sentence_nr": 22 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.30931906627981315, "sentence_nr": 22 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2527893205238235, "sentence_nr": 22 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 22 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.9210500207490827, "sentence_nr": 22 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.9069369532463243, "sentence_nr": 22 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 22 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 22 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4607778969984477, "sentence_nr": 22 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.8103868370118212, "sentence_nr": 22 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 22 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 22 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4885014761119101, "sentence_nr": 22 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.827819363745503, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.17903870455040152, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.18440575845606422, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.1981763713215807, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.2520139548059959, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.17499310607879404, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.18175908515502465, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.10089587713517954, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.11552870044063634, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.3168035112884022, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.30580678632835573, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.10825039887617824, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.1278708456868984, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.29705138694670025, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.2780223931578523, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.18986262747887736, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.18230825914917978, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.42442305789888696, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.42734795538422576, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.18781316135387768, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.16808430602651067, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.3454156644973841, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.30446460704247824, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.4479597674250984, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.41132840401983517, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.10704445941620296, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.13527356658034445, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.09941527806251362, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.13609735884978696, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.19230259308735756, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.22211286692050705, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.20383889880388334, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.17813562619757226, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.2986551380628858, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.30308773908860176, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.10536111661637193, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.13679626017050403, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.19732230687816163, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.22765162763479738, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.3987203877706927, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.39992851145514274, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.09467800236923245, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.11434380596647938, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.15034676904545285, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.36138016740101575, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.31224382417562974, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.345966570287759, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.2816115803298224, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.3461146475963348, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.30131374176129855, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.1552102601937674, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.1381803727119777, "sentence_nr": 22 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4967067363118649, "sentence_nr": 23 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6330776418175281, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.39501632817024007, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5629116515332234, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.44774758283371513, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6323151453499094, "sentence_nr": 23 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3353166764160673, "sentence_nr": 23 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5279751808070301, "sentence_nr": 23 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3340392563357978, "sentence_nr": 23 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5542299582982266, "sentence_nr": 23 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.2288355034549531, "sentence_nr": 23 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.469883747317403, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5472915485853102, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.7136367183558585, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6159995640523437, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8398584608765305, "sentence_nr": 23 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5155625728615272, "sentence_nr": 23 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6435263800797054, "sentence_nr": 23 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 23 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 23 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 23 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.32206162101132135, "sentence_nr": 23 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.24125880497129865, "sentence_nr": 23 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.47825499190432214, "sentence_nr": 23 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.3292010361291119, "sentence_nr": 23 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5670300297444607, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.46086624699736534, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6510894943437193, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5511532346688224, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7550305399541021, "sentence_nr": 23 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.34537865578685034, "sentence_nr": 23 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5956718372193373, "sentence_nr": 23 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.39080227521872696, "sentence_nr": 23 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.621048393466749, "sentence_nr": 23 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.2755396296659942, "sentence_nr": 23 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5033588333252278, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5795086255869999, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7183582779188291, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6214211316495574, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7844755306149331, "sentence_nr": 23 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6008383045972477, "sentence_nr": 23 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7291842011448325, "sentence_nr": 23 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 23 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 23 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 23 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.25418196696822093, "sentence_nr": 23 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 23 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5117784549266909, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.274941620352113, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.4651004879148919, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.22743363869750483, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.5634710936922129, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.518836150464752, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.6242496691584447, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.5989032124636781, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.7291306908177887, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.2677353447271197, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.4460422364967209, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.3558785149067877, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.570837784052645, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.2624310277292268, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.4915471393606767, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.28489318277723963, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.6000278331909762, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.5728668995816387, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.7460634178179616, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.46086624699736534, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.6510894943437193, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.46507550803536196, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.6687857543858925, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.5155625728615272, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.6435263800797054, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.3639412530979476, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.654342605671994, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.19882981891203355, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.45714526865696425, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.32269274420690436, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.49704406859630557, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.42849655626964983, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.662646931303495, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.3937441173550755, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.5600824723479425, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.23114663823833642, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.5107406700140826, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.419793811546288, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.6152785242440109, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.46086624699736534, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.6510894943437193, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.46507550803536196, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.6687857543858925, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.2296660762967038, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.5259172094145851, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.39501632817024007, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.5505822266189535, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.3215000448278979, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.5947774549102596, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.20870371467330825, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.40726160697608454, "sentence_nr": 23 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3460579711860666, "sentence_nr": 24 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.43910565102067395, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.16692770661327389, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2940239540182693, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.17589867762235817, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2991014535844428, "sentence_nr": 24 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.15568794672327907, "sentence_nr": 24 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.30284457998681635, "sentence_nr": 24 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.17340302865304977, "sentence_nr": 24 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.28581037214602456, "sentence_nr": 24 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1782509297990519, "sentence_nr": 24 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.28710039249342334, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4901491669500622, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5638035394617603, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3460579711860666, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4260473803699743, "sentence_nr": 24 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.2011131382865372, "sentence_nr": 24 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.36314253622836745, "sentence_nr": 24 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.17598839092477797, "sentence_nr": 24 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.28650792027744043, "sentence_nr": 24 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 24 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.15997462319973554, "sentence_nr": 24 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.24731742205813823, "sentence_nr": 24 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3980108204104697, "sentence_nr": 24 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5611872124508993, "sentence_nr": 24 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7431443902355421, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4465866985385432, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6260699913485588, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4465866985385432, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6260699913485588, "sentence_nr": 24 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.25509991414681377, "sentence_nr": 24 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.505614827211273, "sentence_nr": 24 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.21452424426866915, "sentence_nr": 24 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.44780791445343104, "sentence_nr": 24 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.23857086413632697, "sentence_nr": 24 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.47971483823439903, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.446411600799131, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5816697577563045, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4664526119731094, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6399376431552989, "sentence_nr": 24 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.20156032858716424, "sentence_nr": 24 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4855075115512445, "sentence_nr": 24 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.1526900266679129, "sentence_nr": 24 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.41716995830580594, "sentence_nr": 24 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 24 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.23259933287371404, "sentence_nr": 24 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.20835831728362864, "sentence_nr": 24 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.49812931259693377, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.17334119484500185, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.31463785312250736, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.12522096513057643, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.335302418196347, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.9100527513271326, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.9584484214161733, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.20156032858716424, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.35007862377558696, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.3449632275226908, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.5000457205552167, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.15568794672327907, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.30284457998681635, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.1529699053146309, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.35702516223197556, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.15975615838102766, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.16928451900289662, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.40173762794247314, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.15568794672327907, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.30284457998681635, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.16038844415635037, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.30359085570641314, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.3595283251171754, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.5790446318474887, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.20563705341552085, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.3762774944524412, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.16692770661327389, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.2940239540182693, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.14165832410287266, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.24107149684266257, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.1258646065963102, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.24857006332411635, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.2519649154562495, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.44974180175388206, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.3253958243003269, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.45173371737296786, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.27618177741751665, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.4305107132988055, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.1683625745315614, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.31167225759119427, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.13728361101885644, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.3436250633828196, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.16353712933127018, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.32934735468962634, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.48680589893384085, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.6190257724123215, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.1551293035275564, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.2674082220133274, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.26091874007348304, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.17598839092477797, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.28650792027744043, "sentence_nr": 24 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1332399603607437, "sentence_nr": 25 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.19971937750838645, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1834283688193615, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.22588088032876846, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.12425342874478343, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.1660533764831914, "sentence_nr": 25 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.15538689193055893, "sentence_nr": 25 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.14158209035366248, "sentence_nr": 25 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.1869416235999822, "sentence_nr": 25 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0578819658044546, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.16170596160446446, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2206817446345091, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.14914968848461002, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.21702090583674813, "sentence_nr": 25 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.138685682297543, "sentence_nr": 25 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 25 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 25 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.1258687317121735, "sentence_nr": 25 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.1327332961698289, "sentence_nr": 25 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.23556366957615363, "sentence_nr": 25 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.22894370639738668, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.16684195647378827, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.21420692177337528, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.33150414660895594, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.30808679013173407, "sentence_nr": 25 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.23556366957615363, "sentence_nr": 25 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.25521078373566897, "sentence_nr": 25 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.14257880024595157, "sentence_nr": 25 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.1979524022915653, "sentence_nr": 25 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.14257880024595157, "sentence_nr": 25 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.1979524022915653, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.260711748598298, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.28143225165615565, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.25621420675166556, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.32613185963061736, "sentence_nr": 25 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.21310996044302127, "sentence_nr": 25 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.2620829676028965, "sentence_nr": 25 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.08892786873926031, "sentence_nr": 25 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.14069122234920528, "sentence_nr": 25 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.12273033502938982, "sentence_nr": 25 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.15070376710164984, "sentence_nr": 25 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.17376029392152273, "sentence_nr": 25 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.22421987263715565, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.07369293827420972, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.08728042965046878, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.12416744870990627, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.06452498627127952, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.09758509152849626, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.09985298970743903, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.22158794642706012, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.20787168962643957, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.05401240601013853, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.07243671671799473, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.1543646468773244, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.09348998462584433, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.130990604448226, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.09885362316286796, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.15900429623613993, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.10903227170832805, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.11481934989482791, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.1745453831609756, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.046916282267844764, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.1250076305588977, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.12985392271660248, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.06737080019124615, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.18629057860741663, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.1504281768235603, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.11099491388125307, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.1201070010200949, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.08702826664587757, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.42262353460370816, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.3966051357904673, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.09612004569821603, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.10249207815381514, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.1341907303110576, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.11635402454082566, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.1636348970852316, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.06028131279303415, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.0901676620993871, "sentence_nr": 25 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6983671476675032, "sentence_nr": 26 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6697193437120026, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5809024483660724, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5409616569206442, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5893051076561628, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.555242666304663, "sentence_nr": 26 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5197038614969076, "sentence_nr": 26 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4944106522194635, "sentence_nr": 26 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5863087308455573, "sentence_nr": 26 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.5756247354842696, "sentence_nr": 26 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.44763438063632005, "sentence_nr": 26 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4327706284829231, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4562933372999328, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.4354000091116894, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.650945489442927, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6025447507087655, "sentence_nr": 26 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5040260890269513, "sentence_nr": 26 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.48159079549233025, "sentence_nr": 26 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3966338449810425, "sentence_nr": 26 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.3940867714969907, "sentence_nr": 26 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3186669369694382, "sentence_nr": 26 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.34867169182256896, "sentence_nr": 26 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6350785093832516, "sentence_nr": 26 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6188888500556722, "sentence_nr": 26 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 26 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 26 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 26 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.7997394936755756, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7811228513409922, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.9660854289024723, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.9613867167137871, "sentence_nr": 26 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.7158159753911548, "sentence_nr": 26 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7127947486849641, "sentence_nr": 26 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6813410498464633, "sentence_nr": 26 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6671821168913319, "sentence_nr": 26 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 26 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 26 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.45066539224706753, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.4254592023616511, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.45779216736532874, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.40945502186629257, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.40071581088356767, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.36844216279073794, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.14609848125563302, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.18504017619904287, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.4184317523303411, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.40500270963162277, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.4125433652059801, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.3955923992862865, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.47182538941865537, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.42450279333172475, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.46492333059956836, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.4401112788616263, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.3967795858478363, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.3803134453035716, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.34915707707242977, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.34988691421168616, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.2613611691981996, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.2740054517113319, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.5600863252474344, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.5179797138258272, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.3461243385522883, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.3560268535895035, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.43650008892828823, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.42551924250056755, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.504580863725975, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.46703102558879955, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.36954961729302616, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.34760122558190465, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.3803026331533805, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.362200056491149, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.24777987943516128, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.2952194113831596, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.5258092834799059, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.4981801549352249, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.48625052891235754, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.4290939038872796, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.4045007320789693, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.4098113348256027, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.44158642009003995, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.40903259597127894, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.4946406341236379, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.4719975064311173, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.41182432358851845, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.4034715718148006, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.3693186725771347, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.36304188784855995, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.3692675983091899, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.37402683054534963, "sentence_nr": 26 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7645786047678913, "sentence_nr": 27 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8655501219338723, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.8643729226327672, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9215030582508996, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.8643729226327672, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9215030582508996, "sentence_nr": 27 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7858164289172753, "sentence_nr": 27 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8872272977237059, "sentence_nr": 27 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.8643729226327672, "sentence_nr": 27 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.9215030582508996, "sentence_nr": 27 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6898913050782208, "sentence_nr": 27 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8620687741940413, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6898913050782208, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8528837782425732, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7708719635370461, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8793197587693242, "sentence_nr": 27 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7708719635370461, "sentence_nr": 27 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.888538633093067, "sentence_nr": 27 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6840689169974626, "sentence_nr": 27 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8314419144081646, "sentence_nr": 27 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.5819799380263497, "sentence_nr": 27 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.7407958979814505, "sentence_nr": 27 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7645786047678913, "sentence_nr": 27 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8655501219338723, "sentence_nr": 27 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.41098733201100757, "sentence_nr": 27 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.651283133493195, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6152755816095169, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7669297251133314, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4250002996145258, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6670552714553488, "sentence_nr": 27 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.3735617779670567, "sentence_nr": 27 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5773479111816255, "sentence_nr": 27 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5543498698280007, "sentence_nr": 27 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7266847297604082, "sentence_nr": 27 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.3716332023564544, "sentence_nr": 27 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.6132388888021502, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6986939462620247, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.8497711598086016, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5072570733389083, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7124868368374351, "sentence_nr": 27 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5907596734005102, "sentence_nr": 27 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7837270250239556, "sentence_nr": 27 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.10008881112800158, "sentence_nr": 27 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.29125356488795046, "sentence_nr": 27 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.0, "sentence_nr": 27 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.041649157343430596, "sentence_nr": 27 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.6587480145435196, "sentence_nr": 27 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.7917841426705801, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.7446828000198126, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.885521980076414, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.6466833757622275, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.7737914417145209, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.4447278656331358, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.6742569711624775, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.600047216971444, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.7511423755179258, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.3382340617900419, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.6182585373365673, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.6069548573053054, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.7630436854704967, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.40482952759410495, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.6241130944295542, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.5021718181363274, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.697189669759932, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.7858164289172753, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.8717639062922423, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.5731680012014568, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.746935173521359, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.7224037170215811, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.8452672523905139, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.5724496367057007, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.7350859720106757, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.42250552136302394, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.6425389837629188, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.7645048342610411, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.876234192352485, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.45751787171307623, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.6647794363792763, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.7623067286250759, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.8682092620191191, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.46189821859121283, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.6442319235751083, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.3931991982536581, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.6422735790483707, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.44644290381704027, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.6892051604181435, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.4000177797533498, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.645169701736652, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.4479818542603719, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.6761961025641056, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.7123871749204508, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.8331784519293958, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.5749089871602278, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.7211428196508521, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.38506289173931413, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.6152360906748179, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.6231488481063673, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.7734960210241439, "sentence_nr": 27 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4460616097899727, "sentence_nr": 28 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6833569517560225, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4460616097899727, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6833569517560225, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4460616097899727, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.693261298341864, "sentence_nr": 28 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4460616097899727, "sentence_nr": 28 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.693261298341864, "sentence_nr": 28 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6745016003476486, "sentence_nr": 28 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8511670783317596, "sentence_nr": 28 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.32329508170352383, "sentence_nr": 28 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6141330847741713, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.4460616097899727, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6833569517560225, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.3837983925863447, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.6366757448341102, "sentence_nr": 28 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6745016003476486, "sentence_nr": 28 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8511670783317596, "sentence_nr": 28 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 28 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 28 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6667025833042813, "sentence_nr": 28 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.746973053424487, "sentence_nr": 28 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.6745016003476486, "sentence_nr": 28 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8511670783317596, "sentence_nr": 28 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.217554942150074, "sentence_nr": 28 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4859163400220353, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.3008656294855478, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5198655773563042, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.3008656294855478, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5198655773563042, "sentence_nr": 28 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.203264842568494, "sentence_nr": 28 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4965705242699611, "sentence_nr": 28 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.32079058840140134, "sentence_nr": 28 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5094305382960898, "sentence_nr": 28 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.23693055763743093, "sentence_nr": 28 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4968400811224627, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.32079058840140134, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5094305382960898, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.35479105265934485, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.4725761870926308, "sentence_nr": 28 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.3301899334885226, "sentence_nr": 28 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5632801217523468, "sentence_nr": 28 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 28 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 28 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.1923904871441659, "sentence_nr": 28 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5825915593253297, "sentence_nr": 28 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.32079058840140134, "sentence_nr": 28 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5094305382960898, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.4892199210635081, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.6263002679299042, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.09147827112247602, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.3360691966057836, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.2966218714191134, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.5348497180679597, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.3008656294855478, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.5209701084013916, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.3254074668234594, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.540582703782851, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.22935466869603194, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.6357138961264384, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.4460616097899727, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.6833569517560225, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.38769943713308697, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.6179897670313796, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.35964066074252593, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.5418421848087059, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.31666472263798334, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.5096984883597744, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.2656621439255861, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.47187800221660153, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.41583634222861793, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.6558319092753532, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.26633048164380024, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.5038200170930055, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.5371525807924681, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.7677378485184402, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.15274299622833287, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.4692950277268683, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.30626101600123445, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.583891679561264, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.18137691349228668, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.4586072719105437, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.23443677523946913, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.5163278972706644, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.33876931708826047, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.550413577565279, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.14207405313947058, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.47874702297210975, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.2539342198718324, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.46375067718601715, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.200726550812963, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.41645295439394076, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.42995245074388394, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.6515566568079457, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.2834052290575623, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.4974109921343301, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.19454290935168927, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.49909763892228687, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.3837983925863447, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.6379993550810827, "sentence_nr": 28 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.1481394578697113, "sentence_nr": 29 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.30063818852404856, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.14216645907653844, "sentence_nr": 29 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.2737034564138708, "sentence_nr": 29 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7778111223054219, "sentence_nr": 29 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8190064480412373, "sentence_nr": 29 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7778111223054219, "sentence_nr": 29 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8190064480412373, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7778111223054219, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8190064480412373, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.14939354788683526, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.29041654772860626, "sentence_nr": 29 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7778111223054219, "sentence_nr": 29 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8190064480412373, "sentence_nr": 29 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0, "sentence_nr": 29 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.0, "sentence_nr": 29 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 1.0, "sentence_nr": 29 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "bleu", "score": 0.7778111223054219, "sentence_nr": 29 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "translation", "metric": "chrf", "score": 0.8190064480412373, "sentence_nr": 29 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4125519163596689, "sentence_nr": 29 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5539867049403877, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5420662441541858, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5445089463670787, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.40919282596076484, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5542936932152527, "sentence_nr": 29 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4125519163596689, "sentence_nr": 29 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5539867049403877, "sentence_nr": 29 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4125519163596689, "sentence_nr": 29 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5539867049403877, "sentence_nr": 29 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5928902071159559, "sentence_nr": 29 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.647817438132439, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.5928902071159559, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.647817438132439, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4125519163596689, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5539867049403877, "sentence_nr": 29 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4125519163596689, "sentence_nr": 29 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5539867049403877, "sentence_nr": 29 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.34641959937802264, "sentence_nr": 29 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.47549559716182727, "sentence_nr": 29 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4125519163596689, "sentence_nr": 29 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5539867049403877, "sentence_nr": 29 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "bleu", "score": 0.4125519163596689, "sentence_nr": 29 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "translation", "metric": "chrf", "score": 0.5539867049403877, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "bleu", "score": 0.42461633178803443, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation", "metric": "chrf", "score": 0.5603699277937889, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "bleu", "score": 0.42254876310519374, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "translation", "metric": "chrf", "score": 0.5561399558171133, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "bleu", "score": 0.2340216139262901, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "translation", "metric": "chrf", "score": 0.4522093023662336, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "bleu", "score": 0.4132352454218328, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "translation", "metric": "chrf", "score": 0.5544725906870476, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "bleu", "score": 0.42254876310519374, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "translation", "metric": "chrf", "score": 0.5561399558171133, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "bleu", "score": 0.3951500216160541, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "translation", "metric": "chrf", "score": 0.6089660957340174, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "bleu", "score": 0.42254876310519374, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "translation", "metric": "chrf", "score": 0.5561399558171133, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "bleu", "score": 0.42282359171428024, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "translation", "metric": "chrf", "score": 0.5395092365663595, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "bleu", "score": 0.35412968165085734, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "translation", "metric": "chrf", "score": 0.4985795126785612, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "bleu", "score": 0.1598921499894403, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "translation", "metric": "chrf", "score": 0.390187618292215, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "bleu", "score": 0.2400540439585043, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "translation", "metric": "chrf", "score": 0.49297433772099697, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "bleu", "score": 0.4806604068305994, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "translation", "metric": "chrf", "score": 0.664228268001068, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "bleu", "score": 0.42254876310519374, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "translation", "metric": "chrf", "score": 0.5561399558171133, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "bleu", "score": 0.2340216139262901, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "translation", "metric": "chrf", "score": 0.45184273575809186, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "bleu", "score": 0.7778111223054219, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "translation", "metric": "chrf", "score": 0.8190064480412373, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "bleu", "score": 0.16533113836624475, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "translation", "metric": "chrf", "score": 0.4074791764578974, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "bleu", "score": 0.28547397706062927, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "translation", "metric": "chrf", "score": 0.4838477808123968, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "bleu", "score": 0.6053011982655683, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "translation", "metric": "chrf", "score": 0.652613765735072, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "bleu", "score": 0.4229247984636106, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "translation", "metric": "chrf", "score": 0.556465536088555, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "bleu", "score": 0.42254876310519374, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "translation", "metric": "chrf", "score": 0.5561399558171133, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "bleu", "score": 0.3471790743028735, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "translation", "metric": "chrf", "score": 0.4458106286047354, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "bleu", "score": 0.3555508425572384, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "translation", "metric": "chrf", "score": 0.5387745992013905, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "bleu", "score": 0.1709686260975486, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "translation", "metric": "chrf", "score": 0.3940091304204109, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "bleu", "score": 0.25958657290343434, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "translation", "metric": "chrf", "score": 0.43162699627918094, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "bleu", "score": 0.2213908395073965, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "translation", "metric": "chrf", "score": 0.4213527844474163, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "bleu", "score": 0.39696685122270786, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "translation", "metric": "chrf", "score": 0.5497060467823045, "sentence_nr": 29 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 1 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 3 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 3 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 5 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 6 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 7 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 7 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 10 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 11 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 11 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 11 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 11 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 11 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 11 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 11 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 11 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 12 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 12 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 12 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 12 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 12 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 12 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 13 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 13 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 14 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 14 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 14 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 15 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 16 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 16 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 16 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 16 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 16 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 17 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 17 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 17 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 18 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 18 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 18 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 18 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 19 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 20 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 20 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 20 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 20 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 20 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 21 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 21 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 21 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 21 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 21 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 21 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 21 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 22 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 23 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 23 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 23 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 23 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 24 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 24 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 24 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 25 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 25 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 25 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 25 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 25 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 26 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 26 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 26 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 26 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 26 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 27 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 27 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 27 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 27 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 27 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 27 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 28 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 28 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 28 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "classification", "metric": "accuracy", "score": 1, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "classification", "metric": "accuracy", "score": 0, "sentence_nr": 29 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9411583614202783, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9389202454786235, "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8775848642818888, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8618703443763697, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7861888156926622, "sentence_nr": 0 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7987489460131649, "sentence_nr": 0 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9319748402595084, "sentence_nr": 0 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7613425680699503, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8493237569441244, "sentence_nr": 0 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9020031517329425, "sentence_nr": 0 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8493237569441244, "sentence_nr": 0 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.883570112979728, "sentence_nr": 0 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8263460336753243, "sentence_nr": 0 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.8060322164809728, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.8980680846396624, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 0.9491059403137463, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9664300701360793, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9457224261353452, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9708225134054753, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9419324607589119, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.9619002332717353, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.9189927159116271, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.895905738615658, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.8719916488298841, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.9397108105925289, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.884345665982421, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9584454525436005, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9643081480127652, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9067144042813564, "sentence_nr": 1 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8781616442886918, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9745733081082687, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9237743711831492, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9659571253320222, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9044755244774213, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9016506657203592, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 1 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9259203238585231, "sentence_nr": 1 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9226314544302758, "sentence_nr": 1 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 1 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 1 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.6237003645369218, "sentence_nr": 1 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.919365977563579, "sentence_nr": 1 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9113270242697518, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.898943894327586, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.9736119227904283, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9415432301630186, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.973004167300919, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 0.9617726716367615, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.8788632576179716, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.9442690941930104, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.9167527970009353, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9264966822048945, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9760432643638268, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.9290639912797567, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9451284616565533, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9571970948049097, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9428452278208271, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.924510998540744, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9354255661287414, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.9038448099971822, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.9290214610132344, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.9359307328554756, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9462257677914746, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9685511109758306, "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.9466350739636148, "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.7785501063601203, "sentence_nr": 2 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8677672451180615, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9519685270619841, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.5524309559543085, "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8979970994003059, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8979970994003059, "sentence_nr": 2 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9598023304313453, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8678877090803476, "sentence_nr": 2 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.3628854370408249, "sentence_nr": 2 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8866932684030095, "sentence_nr": 2 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7932574787392968, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.8840632918991035, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9244224424282228, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.7493760739956499, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.9434070582654602, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.8626111481890223, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 0.9742381587466754, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9614829239512629, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9634058264556766, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.846746937646691, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9416090102549223, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.9586487245465463, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.8628736669093499, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.8883148663773122, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.921000444185013, "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.964284245003951, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.899852954654377, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.5884852453065169, "sentence_nr": 3 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8943359440390058, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.6239646156236577, "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8782485779028959, "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9219735185328113, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8379214027434272, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9171135147465285, "sentence_nr": 3 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8793006100154936, "sentence_nr": 3 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.6764135013792538, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8320911917964368, "sentence_nr": 3 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8320911917964368, "sentence_nr": 3 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 3 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9020259333664543, "sentence_nr": 3 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8443316591536836, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.9062739514559724, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9243814194896306, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9257122714800141, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9048929676970495, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.9233238051356927, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.8961117810241208, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9137011072166213, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9144918070375806, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9447475462972004, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.9418568225974095, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.8631885674989124, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.9540570534869818, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9356691952085903, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.8263666332486633, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.9187937618702817, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.6492261286778312, "sentence_nr": 4 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.4782990117524071, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8728890059382535, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7924841060781368, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8728890059382535, "sentence_nr": 4 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8085699807438939, "sentence_nr": 4 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9309167160514913, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.8335210974928002, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.9407617520385465, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.9009704508776215, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.886161550229872, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 0.8864780713525466, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.8619950335517561, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.877644990158928, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.9473578431592224, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.8989284887461744, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.8982857165205713, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9421743042333945, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.909430339396572, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9114715597392106, "sentence_nr": 5 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9221676855227006, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.903310364652346, "sentence_nr": 5 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.43631872104818037, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.725100223395414, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8342041754812477, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7541096773855238, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9352893606252747, "sentence_nr": 5 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7137044016250488, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8459329201101423, "sentence_nr": 5 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9155785169978052, "sentence_nr": 5 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.454243405917021, "sentence_nr": 5 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.4367071875067552, "sentence_nr": 5 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9053865214400596, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.9344907300105301, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.941467473244312, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.8759462570863868, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.9116059567890715, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.95453015576562, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.9271804273091313, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9494380676747487, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.8907525765155897, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9420326057327402, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.8729192735278123, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.840210783941434, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.8830406923187026, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.8705872791986208, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9129896861855028, "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9775140091004713, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.931908394385036, "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.958499216692883, "sentence_nr": 6 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9787648208394673, "sentence_nr": 6 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8848447424869419, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9476480635849643, "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8420296194650692, "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9645398026978572, "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.976975965491712, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9575751193892209, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.917870378110458, "sentence_nr": 6 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.917870378110458, "sentence_nr": 6 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.922108923148009, "sentence_nr": 6 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9367021384173281, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 6 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.917870378110458, "sentence_nr": 6 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9136709169732016, "sentence_nr": 6 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 6 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9031487241080103, "sentence_nr": 6 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.922108923148009, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9717329164232313, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.9810420842974353, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9296061535584738, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9548717794727779, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 0.9723617284409432, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.9433216405879152, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.9253992588631311, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.7833761650543694, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.8958698547783525, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9659983030155975, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9368374793769542, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9292848975349729, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9705333075369675, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.9560908971572966, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9288860917142431, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.9402643484548583, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9303023646781129, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.9076656012518489, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.9272618174968876, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.9630829363546703, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9437691960187881, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9233897890679653, "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.9217593594034571, "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9429459010031568, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9680340601535599, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9444947592571505, "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9524237679532525, "sentence_nr": 7 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8521740000505951, "sentence_nr": 7 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9147273981117778, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9353915284262971, "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9575256886848735, "sentence_nr": 7 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9544425909905248, "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.887089742205764, "sentence_nr": 7 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8937272463225717, "sentence_nr": 7 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9221577416896909, "sentence_nr": 7 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7360571605491374, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9424882191492142, "sentence_nr": 7 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9158962896380519, "sentence_nr": 7 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9016185053131788, "sentence_nr": 7 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9519313199322048, "sentence_nr": 7 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9002497361613263, "sentence_nr": 7 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9424882191492142, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.9404564646985731, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9525612663771642, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9416090102549223, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 0.946182450185975, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.8827665860178672, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.9242269657430007, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.9209375409360453, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9453162319718537, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.9354735336178899, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9650606723493668, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.937172702008466, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9037456319061896, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9527540439558733, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9777992945719618, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.9483614149601093, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9630476322301069, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.9090634311284931, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9592439701684463, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.9352813563171796, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.9578898822826803, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.9349087092124988, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9553475775967099, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9426144990998162, "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.9455357310467346, "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9950087915805451, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9950087915805451, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9359599516797827, "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9950087915805451, "sentence_nr": 8 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8529883661830301, "sentence_nr": 8 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9313047211019367, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9311406569876187, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9392038901097501, "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9504743930445531, "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9950087915805451, "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9283998656503502, "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9934034758807603, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9131528589305679, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9917679206284817, "sentence_nr": 8 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9566767123929576, "sentence_nr": 8 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9359924521743563, "sentence_nr": 8 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8893588081911743, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 8 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 8 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9583698738001583, "sentence_nr": 8 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 8 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 8 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9934034758807603, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.946392812169666, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.911875333930421, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9169315433407361, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9541325707307038, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 0.9648123726963476, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.8370298547932784, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.9334875203861144, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.9413496332501932, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9956823103485622, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.9457390517164731, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.9671298665063969, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9336521523423332, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9502062892893858, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.9333019767772176, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9037394051488277, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.9283644587512466, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9237582925385585, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.8995566191566017, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.896344147038989, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.09821094254330615, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9548273305811203, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9251737690567995, "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.9275689564213165, "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9272442008199501, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9520060001290835, "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9058859200742604, "sentence_nr": 9 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8789724147701462, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9137645544850267, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8969027357279203, "sentence_nr": 9 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9520060001290835, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9275374047069039, "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8772309014828462, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9140052999897977, "sentence_nr": 9 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.868350408637765, "sentence_nr": 9 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7563541659131354, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8578315979157695, "sentence_nr": 9 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8441075622700097, "sentence_nr": 9 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.23829288001976573, "sentence_nr": 9 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9407267756704489, "sentence_nr": 9 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.831845583109951, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9530684796567226, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.8984174935165463, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.946008414943598, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 0.9285885624039975, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.9645189965938258, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.9601667560566091, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 0.9113133701465544, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.9363094557613988, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9499594621802195, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.8850558582872771, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9413520522974334, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.8953760832780698, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9516191368774216, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.873135905690596, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.9331628274049639, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.9350921637704382, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9398175409358328, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9675093986501344, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9282207391671503, "sentence_nr": 10 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.957452925924953, "sentence_nr": 10 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8937237551170429, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 10 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9256331955884847, "sentence_nr": 10 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.904390835311888, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8995954000535624, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.928962868887516, "sentence_nr": 10 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9339798045072082, "sentence_nr": 10 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8008809042180175, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9240001424211951, "sentence_nr": 10 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.3493344613894351, "sentence_nr": 10 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.92829327413418, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9359271530286619, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9641555435524619, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.90719289051837, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.8543701176038877, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9045960456690756, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.9576659929734302, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9445842802137389, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.917893569547509, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9031282594956593, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9325823323160847, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.9171277146973622, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9631220314707449, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.9125575210703364, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9671298665063969, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.8583796678495444, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.917870378110458, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9075511178990168, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.8942877287874674, "sentence_nr": 10 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 10 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8881782096383685, "sentence_nr": 11 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8452994228892592, "sentence_nr": 11 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.857664755026069, "sentence_nr": 11 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7687402404428638, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9425182378610694, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8221659843346086, "sentence_nr": 11 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8685375697135141, "sentence_nr": 11 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7860944644568774, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7829829019188287, "sentence_nr": 11 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9425182378610694, "sentence_nr": 11 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.289269703803095, "sentence_nr": 11 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7829829019188287, "sentence_nr": 11 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9425182378610694, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.915813486906383, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.928671169616198, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.9195852720074569, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.9260563505342738, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.8580715674095071, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.8991782906832555, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9549429726485847, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.8571447284090962, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.953599772014362, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9382091007325469, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9125682774652475, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9084959093441131, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9389584881035126, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.8968120926569282, "sentence_nr": 11 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.8757339860702672, "sentence_nr": 11 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9742989957563788, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9854564066904739, "sentence_nr": 12 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.938338375356983, "sentence_nr": 12 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9363458435045497, "sentence_nr": 12 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9275189832478317, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9680610688075657, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9458276502828801, "sentence_nr": 12 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9555270393882619, "sentence_nr": 12 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.774972667720128, "sentence_nr": 12 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9238483556315539, "sentence_nr": 12 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 12 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9292605756517186, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8710905917506855, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8877998658561537, "sentence_nr": 12 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9408832971568818, "sentence_nr": 12 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8758560882945217, "sentence_nr": 12 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9047504210526172, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9149458726191051, "sentence_nr": 12 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9497380252636716, "sentence_nr": 12 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9247145535687903, "sentence_nr": 12 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 12 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8829314518141973, "sentence_nr": 12 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9497380252636716, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9369900232316837, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.9584772514045287, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9656526051593539, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9262800142753679, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.9178799098053634, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.8988056403515298, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 0.9240902217687106, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.9454713149117651, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9457650793019858, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.9115531547253959, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9403725471773088, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9107758326980321, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9251111872988325, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.9267004903727016, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9652440580136615, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.924254800539438, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9054967244578502, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.886673201587762, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.9082204179924286, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.9665046359304257, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9492870842156111, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 12 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.9495327576081029, "sentence_nr": 12 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9605742681789634, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9410712595774171, "sentence_nr": 13 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.971921146040729, "sentence_nr": 13 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8360964435901039, "sentence_nr": 13 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9278436686065653, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9540941235545723, "sentence_nr": 13 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.7765803419515074, "sentence_nr": 13 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9368660209060221, "sentence_nr": 13 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9179315685239186, "sentence_nr": 13 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9198867501155861, "sentence_nr": 13 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9357668560693397, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.880651835588671, "sentence_nr": 13 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 13 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9322025130978147, "sentence_nr": 13 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8666701669384438, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9301584319196643, "sentence_nr": 13 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 13 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9201441893603447, "sentence_nr": 13 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.4518476286184633, "sentence_nr": 13 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8817151383770689, "sentence_nr": 13 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.9719892276800867, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9232252378020026, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.90340499273861, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 0.9445601279006905, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.9284637794790105, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.9506720475284802, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9650672132857259, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.935825271074837, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9417006532894496, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.9180957642017807, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9336273124319283, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9199623581249377, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9420383150390214, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.9169222881606529, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9358954768171188, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.9210475526688618, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.900422383617428, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.9665042848270522, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9195975724156285, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9482591669689567, "sentence_nr": 13 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.894400898846725, "sentence_nr": 13 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9683895601588671, "sentence_nr": 14 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.974733551222386, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 14 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.935724475087967, "sentence_nr": 14 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.945278116491169, "sentence_nr": 14 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.913976993531483, "sentence_nr": 14 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9206503738833902, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8879551150411227, "sentence_nr": 14 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9093507960484853, "sentence_nr": 14 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.44325871778061554, "sentence_nr": 14 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8629899790604912, "sentence_nr": 14 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8839868610728687, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9659019608247615, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9612040783142544, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9355702448711621, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.8575724679460186, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.919154316989783, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.9107041155041439, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 0.8860042875765471, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.9163443895096822, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9513360683724416, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9506442510575418, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.9210869399305139, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.8602965545640948, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.8912610518101419, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.857937519719319, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.9528771181894694, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9241995664234885, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9432104991415542, "sentence_nr": 14 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.8927784164557715, "sentence_nr": 14 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8681309346882299, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9045257596276787, "sentence_nr": 15 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.7552111299277484, "sentence_nr": 15 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.82396628763246, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 15 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8809116426093319, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9069369532463243, "sentence_nr": 15 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8255413975339149, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9116712045344968, "sentence_nr": 15 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8855094439275503, "sentence_nr": 15 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8809116426093319, "sentence_nr": 15 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.5085021700346579, "sentence_nr": 15 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8155954216287978, "sentence_nr": 15 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8849766832597384, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9321985099431636, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.9158869153954171, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.8940299169999223, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9029209331114941, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 0.9434784706316768, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.9504499063681887, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.8103402263404181, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9033542015144801, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.8920851535963175, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.9012698346023688, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.8815241253287673, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.955434974676454, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.9190034267575142, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9028341607528202, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.7933760889502307, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9669111778196173, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9183552099282611, "sentence_nr": 15 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.9213964969470535, "sentence_nr": 15 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9438561056375272, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9245427558640842, "sentence_nr": 16 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9466217999433078, "sentence_nr": 16 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8600910973378976, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.5465479162881712, "sentence_nr": 16 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.908088143295894, "sentence_nr": 16 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8689979953554426, "sentence_nr": 16 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8837997874830685, "sentence_nr": 16 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9430526976186369, "sentence_nr": 16 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7213258253735133, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8583796678495444, "sentence_nr": 16 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.5352913894873965, "sentence_nr": 16 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7506613813658406, "sentence_nr": 16 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9120029292560927, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.969258616291086, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9359933426460225, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.8446197069920836, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 0.9665537794677691, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.7519024768911576, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.9419599049218603, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 0.9100379761498075, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9243062555931161, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.9232535952320629, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9430158926147498, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.8923268998495886, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9316958873367511, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9441083273271286, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.899852954654377, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.9451690574618664, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9470556595464068, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.8625414653847894, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.8658510104009289, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.938651167013012, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9362303281043904, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 16 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.9288883358178652, "sentence_nr": 16 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.7378741057437793, "sentence_nr": 17 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.898904151376881, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 17 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8446522700991944, "sentence_nr": 17 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9053865214400596, "sentence_nr": 17 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8700885813654318, "sentence_nr": 17 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9331139325257429, "sentence_nr": 17 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8860497305091617, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8222704990602537, "sentence_nr": 17 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8402559609277754, "sentence_nr": 17 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7386088026745246, "sentence_nr": 17 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.06557474419143802, "sentence_nr": 17 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8700885813654318, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.8771568927591851, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.8869070241487921, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.8173012945645394, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.8220012279932035, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.8449397341788647, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9239069749524619, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.8972504357155736, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.6602446784708298, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.8667833154965509, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.7306831212016971, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.7306831212016971, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.7406377967705062, "sentence_nr": 17 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.8509760908759664, "sentence_nr": 17 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.96926930549605, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 18 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8641726957145408, "sentence_nr": 18 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9637804258017773, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 18 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 18 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9240863542577373, "sentence_nr": 18 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9450374119495017, "sentence_nr": 18 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.96926930549605, "sentence_nr": 18 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9424882191492142, "sentence_nr": 18 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9424882191492142, "sentence_nr": 18 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9392663489644577, "sentence_nr": 18 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8389799674466019, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9253208187778743, "sentence_nr": 18 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9221577416896909, "sentence_nr": 18 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9221577416896909, "sentence_nr": 18 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 18 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 18 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9144266092886102, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9500117624130617, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.905862662289465, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9003734503251455, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.858544407149412, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.9281598514152588, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 0.948121913854874, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9629589146416885, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.9580736862318411, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9708835294542548, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.9234823141384267, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9939521304203686, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9474838221026617, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9488355997601815, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.9424390135303181, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9711070259637357, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.9237920416869381, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.8942780008373756, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.8468261925085733, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.8976119317111001, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.9527352893094178, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9510981354135275, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9521144628004171, "sentence_nr": 18 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 18 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9774592733638915, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9665042848270522, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9806060444395596, "sentence_nr": 19 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9049668032095894, "sentence_nr": 19 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9665042848270522, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9667317239059525, "sentence_nr": 19 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 19 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9159800198090925, "sentence_nr": 19 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9667317239059525, "sentence_nr": 19 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8925738398388144, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9058585844143391, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8888787903169728, "sentence_nr": 19 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8925738398388144, "sentence_nr": 19 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9128855680689195, "sentence_nr": 19 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9272821491047395, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9413354408985303, "sentence_nr": 19 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.46619006556188114, "sentence_nr": 19 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.913896513382741, "sentence_nr": 19 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9058585844143391, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9557922260754473, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.9391656780027514, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9260113686541587, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9419307613884336, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.9845996986850503, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.9255228522887315, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.889174440461237, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 0.9496761617043387, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9322360743819351, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.935492418630274, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9456325305487512, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.9325466173278317, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9240800356922247, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9361690788124847, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.938043640398588, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.901373116210745, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9531605377803356, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.9132591460407243, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9494481589794223, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9415361564397403, "sentence_nr": 19 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.897450557161678, "sentence_nr": 19 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.937002127196651, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9435408381256087, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9421449698305296, "sentence_nr": 20 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 20 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 20 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9607456319189528, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 20 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 20 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.5467617051776391, "sentence_nr": 20 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 20 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 20 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8969209805167669, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9060555921929084, "sentence_nr": 20 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8969209805167669, "sentence_nr": 20 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9096430262961498, "sentence_nr": 20 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7983940190154283, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9132591460407243, "sentence_nr": 20 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9204057102575467, "sentence_nr": 20 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 20 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.4968312722246179, "sentence_nr": 20 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8001971757912975, "sentence_nr": 20 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9204057102575467, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.95112146871187, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.949624286506194, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9588139991437585, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9236414681715879, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 0.9453633691396565, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.9278367059866518, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.9302237306555959, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.8441460025255829, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9470556595464068, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.951863030034636, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.8944443568631728, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.9082204179924286, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.90717359411325, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9014597856352894, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.9250084453288043, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.95462554022758, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.9318340131711181, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9736147802901586, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.9182449217144187, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.9400180064454685, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9255769217104873, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9309426923102619, "sentence_nr": 20 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.9002012094811458, "sentence_nr": 20 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9690017425712892, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.6924365679057801, "sentence_nr": 21 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.804543317337012, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8385395593542468, "sentence_nr": 21 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9515560914045473, "sentence_nr": 21 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.854435717190483, "sentence_nr": 21 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7570244995532351, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.6676892344393273, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.873135905690596, "sentence_nr": 21 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.6885773376269438, "sentence_nr": 21 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.46961217063286037, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8379214027434272, "sentence_nr": 21 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7570244995532351, "sentence_nr": 21 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.20981645725460496, "sentence_nr": 21 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.6659995521111991, "sentence_nr": 21 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7570244995532351, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.8388678282825207, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.9180596829241628, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9166274634412449, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.8626786769008709, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.7991709881281639, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.8119656541607598, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.8872308158649556, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.8914910756561332, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.927494511055529, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9528614248210486, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.8523282278495175, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9297633204435644, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.9278042759794851, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.8998995790099074, "sentence_nr": 21 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.9302677881301988, "sentence_nr": 21 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9522511234396616, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.7585159184184324, "sentence_nr": 22 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8922770448230282, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9126128133576369, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.6245412677586388, "sentence_nr": 22 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.868233862673363, "sentence_nr": 22 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8852329532489643, "sentence_nr": 22 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8378994642516495, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8775848642818888, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8493237569441244, "sentence_nr": 22 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9448292727000915, "sentence_nr": 22 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8555426729178464, "sentence_nr": 22 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7558344174949267, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8497451239178159, "sentence_nr": 22 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8493237569441244, "sentence_nr": 22 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8199763712080639, "sentence_nr": 22 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 22 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8206722459046871, "sentence_nr": 22 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.883570112979728, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.914786293186172, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.8845568645036501, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.8937192042814042, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.901348698020278, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.8382013802825361, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9219786709510569, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.8767649499531999, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9094880423990607, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.8719390074611821, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.9349020382990011, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9272997117562144, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.8962185446474815, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.8930034245249151, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.9271664513693498, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.8936606750264663, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.8803360259381345, "sentence_nr": 22 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.8680210960657176, "sentence_nr": 22 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9699436870249787, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9699436870249787, "sentence_nr": 23 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.7931982206364059, "sentence_nr": 23 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9699436870249787, "sentence_nr": 23 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9438398456065387, "sentence_nr": 23 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9281186022380125, "sentence_nr": 23 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9699436870249787, "sentence_nr": 23 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9184823166209557, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8839868610728687, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8884834862973964, "sentence_nr": 23 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9300073119656489, "sentence_nr": 23 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9381606131991436, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8839868610728687, "sentence_nr": 23 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9300073119656489, "sentence_nr": 23 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.4072337657555589, "sentence_nr": 23 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9184823166209557, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9293646790023864, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.9511392272878579, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9293879632586071, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9277950353049101, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.8843378183459343, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.8741633139531418, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 0.9271525909282003, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9736840552120738, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.9396084767892234, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9187563342696414, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.8896752045577786, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9737097349915758, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9330058893011377, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.9570066548501687, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9201684039669155, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.9133901345922595, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9458636432813123, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.917857433142856, "sentence_nr": 23 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.9375412439691305, "sentence_nr": 23 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9556267474396976, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9489054429933926, "sentence_nr": 24 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9489054429933926, "sentence_nr": 24 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8327628422929998, "sentence_nr": 24 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9249365863966041, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.922528755167094, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9486938895906879, "sentence_nr": 24 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 24 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 24 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8620685016584069, "sentence_nr": 24 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9405916043682414, "sentence_nr": 24 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9327915990783561, "sentence_nr": 24 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.909738029095061, "sentence_nr": 24 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.909738029095061, "sentence_nr": 24 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8707492337114523, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.95883735444933, "sentence_nr": 24 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9455007606735264, "sentence_nr": 24 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9372630850025364, "sentence_nr": 24 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.5983897920478856, "sentence_nr": 24 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9299762198228243, "sentence_nr": 24 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9302303599426779, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9544609413449265, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9355306533611718, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.9432457481338326, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.9472285181144658, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.923828763793418, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 0.9224761498105726, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9756278595118478, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.9499594621802195, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9544238060448419, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.9012364553153411, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.8199585012210312, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9280048312907723, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9587462450914201, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.8938919301593574, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9507758066685948, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.9432005035367906, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9675203656708941, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.9303385434730891, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.9401106918306472, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9533532275954528, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9274629860503822, "sentence_nr": 24 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.8926908826740254, "sentence_nr": 24 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.6224897798032885, "sentence_nr": 25 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.7893575827661004, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 25 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9478696521177714, "sentence_nr": 25 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7160421907140165, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.6217685026572488, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.794919886900137, "sentence_nr": 25 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8793006100154936, "sentence_nr": 25 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.5916523997385489, "sentence_nr": 25 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.4849269488253923, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7462718113811923, "sentence_nr": 25 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8083701726292805, "sentence_nr": 25 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.722502153449955, "sentence_nr": 25 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 25 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.5544920599877754, "sentence_nr": 25 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.6853792233736985, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9200538056807258, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.9630774769374594, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9143443086107108, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9052744049140443, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 0.9018850910676268, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.9142574363760879, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 0.9168431011517528, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9141901633008906, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9317477810881586, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9354759108346813, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9141453314674155, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9550191440621234, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.8402328635525613, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.831845583109951, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.9092382099397807, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9770044719642067, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9155318202784664, "sentence_nr": 25 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 25 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8272309965382391, "sentence_nr": 26 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.7767725512278205, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9373981486656514, "sentence_nr": 26 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9579023880929557, "sentence_nr": 26 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9268329536813669, "sentence_nr": 26 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.904428807825769, "sentence_nr": 26 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9116613044583819, "sentence_nr": 26 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9084279839455062, "sentence_nr": 26 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8013174743750245, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.34811585804131506, "sentence_nr": 26 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8912610518101419, "sentence_nr": 26 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9084279608664247, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9342971539350323, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.9618018909441389, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.9221850850049388, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9621502301102783, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9207497282487874, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.8817316559043479, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9595521389704431, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9604273088099046, "sentence_nr": 26 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.8832167531630292, "sentence_nr": 26 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9618018909441389, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9338423795983638, "sentence_nr": 27 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8069582822584229, "sentence_nr": 27 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9432051372011929, "sentence_nr": 27 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8875472267363329, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8538919155402751, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8601111478550084, "sentence_nr": 27 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8274840531521687, "sentence_nr": 27 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8737243337458652, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8875472267363329, "sentence_nr": 27 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8213297311895551, "sentence_nr": 27 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.3007622907436899, "sentence_nr": 27 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.906379768806771, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.8996352283472103, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.8577239523880982, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9705288278234159, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.9022302698191352, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.9618116705103616, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.9282902444420971, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 0.9283062281157928, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9143841728614055, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.9148205155364358, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9429357495928096, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.8199038085123204, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9007500710615358, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9335504867261654, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.8519148326217993, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9467340802817513, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.8914166352994622, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.8697448206881571, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.9091527400737927, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9487286082082608, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.9550331732946552, "sentence_nr": 27 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 27 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9703747509928279, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9540941235545723, "sentence_nr": 28 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9543144589160125, "sentence_nr": 28 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.652649628941592, "sentence_nr": 28 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9767775472269087, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9393628940364738, "sentence_nr": 28 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9410712595774171, "sentence_nr": 28 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.6444379795256558, "sentence_nr": 28 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8964898605551818, "sentence_nr": 28 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9263597385884417, "sentence_nr": 28 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.899546929868499, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.899546929868499, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9256238040654331, "sentence_nr": 28 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9256238040654331, "sentence_nr": 28 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 28 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.824741266541094, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 28 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.9256238040654331, "sentence_nr": 28 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8283905649271065, "sentence_nr": 28 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.0, "sentence_nr": 28 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.881413837458117, "sentence_nr": 28 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.899546929868499, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9437940294094723, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.9269703177791706, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.944904344834561, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.8479413107328494, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 0.9715595760527852, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 0.8595969327963556, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 0.9538713542813556, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.8348508116391393, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 0.9572462820044535, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.9375119517314923, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.9493167367596885, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 0.9344916654109876, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 0.9849529115133767, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9275259780895282, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.9276874028790393, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 0.9473074618830379, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 0.9460494618521745, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.8923268998495886, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 0.9455007606735264, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 0.9526558782357073, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9338345156544289, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 0.8995764072227389, "sentence_nr": 28 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.9505226544098013, "sentence_nr": 28 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9630841609539229, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9451142647196181, "sentence_nr": 29 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.7510122845400926, "sentence_nr": 29 }, { "model": "mistralai/mistral-nemo", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.8090165300577936, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "google/gemma-3-27b-it", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "qwen/qwq-32b", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 0.9543128468386116, "sentence_nr": 29 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "amazon/nova-micro-v1", "bcp_47": "en", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.920197561569537, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.1-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8759929746436435, "sentence_nr": 29 }, { "model": "meta-llama/llama-3-70b-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "mistralai/mistral-small-24b-instruct-2501", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8935424392990651, "sentence_nr": 29 }, { "model": "mistralai/mistral-nemo", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.7769676399488106, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8875472267363329, "sentence_nr": 29 }, { "model": "google/gemini-2.0-flash-lite-001", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "google/gemma-3-27b-it", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "qwen/qwq-32b", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8664932988313133, "sentence_nr": 29 }, { "model": "microsoft/phi-4-multimodal-instruct", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.906379768806771, "sentence_nr": 29 }, { "model": "amazon/nova-micro-v1", "bcp_47": "zh", "task": "language_modeling", "metric": "chrf", "score": 0.8001297194719582, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "language_modeling", "metric": "chrf", "score": 0.9048724843551281, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "es", "task": "language_modeling", "metric": "chrf", "score": 0.8947987168857687, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ar", "task": "language_modeling", "metric": "chrf", "score": 0.9489238765618674, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ur", "task": "language_modeling", "metric": "chrf", "score": 0.945278116491169, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "bn", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pt", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "pa", "task": "language_modeling", "metric": "chrf", "score": 0.8989194854163256, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ru", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "sw", "task": "language_modeling", "metric": "chrf", "score": 0.9061728639858796, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "id", "task": "language_modeling", "metric": "chrf", "score": 0.9501419212325259, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "de", "task": "language_modeling", "metric": "chrf", "score": 0.891206254843651, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ja", "task": "language_modeling", "metric": "chrf", "score": 0.9331628274049639, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "te", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "mr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "jv", "task": "language_modeling", "metric": "chrf", "score": 0.9046319474149982, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "vi", "task": "language_modeling", "metric": "chrf", "score": 0.9463095328863311, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ta", "task": "language_modeling", "metric": "chrf", "score": 0.9085828484030862, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fa", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "tr", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "yue", "task": "language_modeling", "metric": "chrf", "score": 0.8856061163721227, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "ko", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "it", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "fil", "task": "language_modeling", "metric": "chrf", "score": 0.9661878700572512, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "arz", "task": "language_modeling", "metric": "chrf", "score": 1.0, "sentence_nr": 29 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "gu", "task": "language_modeling", "metric": "chrf", "score": 0.908669313428767, "sentence_nr": 29 } ]